/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
1
# Copyright (C) 2008 Canonical Limited.
2
# 
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 2 as published
5
# by the Free Software Foundation.
6
# 
7
# This program is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
# GNU General Public License for more details.
11
# 
12
# You should have received a copy of the GNU General Public License
13
# along with this program; if not, write to the Free Software
14
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
15
# 
16
17
"""Compiled extensions for doing compression."""
18
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
19
cdef extern from *:
20
    ctypedef unsigned long size_t
21
    void * malloc(size_t)
0.18.23 by John Arbash Meinel
Now we can add more lines without having to rebuild the whole hash
22
    void * realloc(void *, size_t)
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
23
    void free(void *)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
24
    void memcpy(void *, void *, size_t)
25
26
cdef extern from "delta.h":
27
    struct delta_index:
28
        unsigned long memsize
29
        void *src_buf
30
        unsigned long src_size
31
        unsigned int hash_mask
32
        # struct index_entry *hash[]
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
33
    delta_index * create_delta_index(void *buf, unsigned long bufsize, unsigned
34
                                     long agg_src_offset)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
35
    void free_delta_index(delta_index *index)
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
36
    void *create_delta(delta_index **indexes,
37
             unsigned int num_indexes,
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
38
             void *buf, unsigned long bufsize,
39
             unsigned long *delta_size, unsigned long max_delta_size)
40
    unsigned long get_delta_hdr_size(unsigned char **datap,
41
                                     unsigned char *top)
42
    Py_ssize_t DELTA_SIZE_MIN
0.23.7 by John Arbash Meinel
Add a apply_delta2 function, just in case it matters.
43
    void *patch_delta(void *src_buf, unsigned long src_size,
44
                      void *delta_buf, unsigned long delta_size,
45
                      unsigned long *dst_size)
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
46
47
cdef extern from "Python.h":
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
48
    int PyString_CheckExact(object)
49
    char * PyString_AS_STRING(object)
50
    Py_ssize_t PyString_GET_SIZE(object)
51
    object PyString_FromStringAndSize(char *, Py_ssize_t)
52
53
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
54
cdef void *safe_malloc(size_t count) except NULL:
55
    cdef void *result
56
    result = malloc(count)
57
    if result == NULL:
58
        raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
59
    return result
60
61
62
cdef void *safe_realloc(void * old, size_t count) except NULL:
63
    cdef void *result
64
    result = realloc(old, count)
65
    if result == NULL:
66
        raise MemoryError('Failed to reallocate to %d bytes of memory'
67
                          % (count,))
68
    return result
69
70
71
cdef int safe_free(void **val) except -1:
72
    assert val != NULL
73
    if val[0] != NULL:
74
        free(val[0])
75
        val[0] = NULL
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
76
0.23.17 by John Arbash Meinel
Create a wrapper function, so that lsprof will properly attribute time spent.
77
def make_delta_index(source):
78
    return DeltaIndex(source)
79
80
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
81
cdef class DeltaIndex:
82
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
83
    #cdef list _sources
84
    cdef readonly object _sources
85
    cdef delta_index **_indexes
86
    cdef readonly unsigned int _num_indexes
87
    cdef readonly unsigned int _max_num_indexes
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
88
    cdef public unsigned long _source_offset
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
89
90
    def __repr__(self):
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
91
        return '%s(%d, %d, %d)' % (self.__class__.__name__,
92
            len(self._sources), self._source_offset,
93
            self._num_indexes)
94
95
    def __init__(self, source=None):
96
        self._sources = []
97
        self._max_num_indexes = 1024
98
        self._indexes = <delta_index**>safe_malloc(sizeof(delta_index*)
99
                                                   * self._max_num_indexes)
100
        self._num_indexes = 0
101
        self._source_offset = 0
102
103
        if source is not None:
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
104
            self.add_source(source, 0)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
105
106
    def __dealloc__(self):
107
        self._ensure_no_indexes()
108
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
109
    def add_source(self, source, unadded_bytes):
110
        """Add a new bit of source text to the delta indexes.
111
112
        :param source: The text in question, this must be a byte string
113
        :param unadded_bytes: Assume there are this many bytes that didn't get
114
            added between this source and the end of the previous source.
115
        """
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
116
        cdef char *c_source
117
        cdef Py_ssize_t c_source_size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
118
        cdef delta_index *index
119
        cdef unsigned int num_indexes
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
120
        cdef unsigned long agg_src_offset
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
121
122
        if not PyString_CheckExact(source):
123
            raise TypeError('source is not a str')
124
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
125
        self._sources.append(source)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
126
        c_source = PyString_AS_STRING(source)
127
        c_source_size = PyString_GET_SIZE(source)
128
129
        # TODO: Are usage is ultimately going to be different than the one that
130
        #       was originally designed. Specifically, we are going to want to
131
        #       be able to update the index by hashing future data. It should
132
        #       fit just fine into the structure. But for now, we just wrap
133
        #       create_delta_index (For example, we could always reserve enough
134
        #       space to hash a 4MB string, etc.)
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
135
        agg_src_offset = self._source_offset + unadded_bytes
136
        index = create_delta_index(c_source, c_source_size, agg_src_offset)
137
        self._source_offset = agg_src_offset + c_source_size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
138
        if index != NULL:
139
            num_indexes = self._num_indexes + 1
140
            if num_indexes >= self._max_num_indexes:
141
                self._expand_indexes()
142
            self._indexes[self._num_indexes] = index
143
            self._num_indexes = num_indexes
144
145
    cdef _expand_indexes(self):
146
        self._max_num_indexes = self._max_num_indexes * 2
147
        self._indexes = <delta_index **>safe_realloc(self._indexes,
148
                                                sizeof(delta_index *)
149
                                                * self._max_num_indexes)
150
151
    cdef _ensure_no_indexes(self):
152
        cdef int i
153
154
        if self._indexes != NULL:
155
            for i from 0 <= i < self._num_indexes:
156
                free_delta_index(self._indexes[i])
157
                self._indexes[i] = NULL
158
            free(self._indexes)
159
            self._indexes = NULL
160
            self._max_num_indexes = 0
161
            self._num_indexes = 0
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
162
163
    def make_delta(self, target_bytes, max_delta_size=0):
164
        """Create a delta from the current source to the target bytes."""
165
        cdef char *target
166
        cdef Py_ssize_t target_size
167
        cdef void * delta
168
        cdef unsigned long delta_size
169
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
170
        if self._num_indexes == 0:
0.23.15 by John Arbash Meinel
Handle when self._index is NULL, mostly because the source text was the empty strig.
171
            return None
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
172
173
        if not PyString_CheckExact(target_bytes):
174
            raise TypeError('target is not a str')
175
176
        target = PyString_AS_STRING(target_bytes)
177
        target_size = PyString_GET_SIZE(target_bytes)
178
179
        # TODO: inline some of create_delta so we at least don't have to double
180
        #       malloc, and can instead use PyString_FromStringAndSize, to
181
        #       allocate the bytes into the final string
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
182
        delta = create_delta(self._indexes, self._num_indexes,
183
                             target, target_size,
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
184
                             &delta_size, max_delta_size)
185
        result = None
186
        if delta:
187
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
188
            free(delta)
189
        return result
190
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
191
192
def make_delta(source_bytes, target_bytes):
193
    """Create a delta from source_bytes => target_bytes."""
194
    cdef char *source
195
    cdef Py_ssize_t source_size
196
    cdef char *target
197
    cdef Py_ssize_t target_size
198
    cdef delta_index *index
199
    cdef void * delta
200
    cdef unsigned long delta_size
201
    cdef unsigned long max_delta_size
202
203
    max_delta_size = 0 # Unlimited
204
205
    if not PyString_CheckExact(source_bytes):
206
        raise TypeError('source is not a str')
207
    if not PyString_CheckExact(target_bytes):
208
        raise TypeError('target is not a str')
209
210
    source = PyString_AS_STRING(source_bytes)
211
    source_size = PyString_GET_SIZE(source_bytes)
212
    target = PyString_AS_STRING(target_bytes)
213
    target_size = PyString_GET_SIZE(target_bytes)
214
215
    result = None
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
216
    index = create_delta_index(source, source_size, 0)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
217
    if index != NULL:
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
218
        delta = create_delta(&index, 1, target, target_size,
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
219
                             &delta_size, max_delta_size)
220
        free_delta_index(index);
221
        if delta:
222
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
223
            free(delta)
224
    return result
225
226
227
def apply_delta(source_bytes, delta_bytes):
228
    """Apply a delta generated by make_delta to source_bytes."""
229
    cdef char *source
230
    cdef Py_ssize_t source_size
231
    cdef char *delta
232
    cdef Py_ssize_t delta_size
233
    cdef unsigned char *data, *top
234
    cdef unsigned char *dst_buf, *out, cmd
235
    cdef Py_ssize_t size
236
    cdef unsigned long cp_off, cp_size
237
238
    if not PyString_CheckExact(source_bytes):
239
        raise TypeError('source is not a str')
240
    if not PyString_CheckExact(delta_bytes):
241
        raise TypeError('delta is not a str')
242
243
    source = PyString_AS_STRING(source_bytes)
244
    source_size = PyString_GET_SIZE(source_bytes)
245
    delta = PyString_AS_STRING(delta_bytes)
246
    delta_size = PyString_GET_SIZE(delta_bytes)
247
248
    # Code taken from patch-delta.c, only brought here to give better error
249
    # handling, and to avoid double allocating memory
250
    if (delta_size < DELTA_SIZE_MIN):
251
        # XXX: Invalid delta block
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
252
        raise RuntimeError('delta_size %d smaller than min delta size %d'
253
                           % (delta_size, DELTA_SIZE_MIN))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
254
255
    data = <unsigned char *>delta
256
    top = data + delta_size
257
258
    # make sure the orig file size matches what we expect
259
    # XXX: gcc warns because data isn't defined as 'const'
260
    size = get_delta_hdr_size(&data, top)
0.23.10 by John Arbash Meinel
Allowing the source bytes to be longer than expected.
261
    if (size > source_size):
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
262
        # XXX: mismatched source size
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
263
        raise RuntimeError('source size %d < expected source size %d'
264
                           % (source_size, size))
0.23.10 by John Arbash Meinel
Allowing the source bytes to be longer than expected.
265
    source_size = size
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
266
267
    # now the result size
268
    size = get_delta_hdr_size(&data, top)
269
    result = PyString_FromStringAndSize(NULL, size)
270
    dst_buf = <unsigned char*>PyString_AS_STRING(result)
271
    # XXX: The original code added a trailing null here, but this shouldn't be
272
    #      necessary when using PyString_FromStringAndSize
273
    # dst_buf[size] = 0
274
275
    out = dst_buf
276
    while (data < top):
277
        cmd = data[0]
278
        data = data + 1
279
        if (cmd & 0x80):
280
            cp_off = cp_size = 0
281
            if (cmd & 0x01):
282
                cp_off = data[0]
283
                data = data + 1
284
            if (cmd & 0x02):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
285
                cp_off = cp_off | (data[0] << 8)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
286
                data = data + 1
287
            if (cmd & 0x04):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
288
                cp_off = cp_off | (data[0] << 16)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
289
                data = data + 1
290
            if (cmd & 0x08):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
291
                cp_off = cp_off | (data[0] << 24)
292
                data = data + 1
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
293
            if (cmd & 0x10):
294
                cp_size = data[0]
295
                data = data + 1
296
            if (cmd & 0x20):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
297
                cp_size = cp_size | (data[0] << 8)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
298
                data = data + 1
299
            if (cmd & 0x40):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
300
                cp_size = cp_size | (data[0] << 16)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
301
                data = data + 1
302
            if (cp_size == 0):
303
                cp_size = 0x10000
304
            if (cp_off + cp_size < cp_size or
305
                cp_off + cp_size > source_size or
306
                cp_size > size):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
307
                raise RuntimeError('Something wrong with:'
308
                    ' cp_off = %s, cp_size = %s'
309
                    ' source_size = %s, size = %s'
310
                    % (cp_off, cp_size, source_size, size))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
311
            memcpy(out, source + cp_off, cp_size)
312
            out = out + cp_size
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
313
            size = size - cp_size
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
314
        elif (cmd):
315
            if (cmd > size):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
316
                raise RuntimeError('Insert instruction longer than remaining'
317
                    ' bytes: %d > %d' % (cmd, size))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
318
            memcpy(out, data, cmd)
319
            out = out + cmd
320
            data = data + cmd
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
321
            size = size - cmd
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
322
        else:
323
            # /*
324
            #  * cmd == 0 is reserved for future encoding
325
            #  * extensions. In the mean time we must fail when
326
            #  * encountering them (might be data corruption).
327
            #  */
328
            ## /* XXX: error("unexpected delta opcode 0"); */
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
329
            raise RuntimeError('Got delta opcode: 0, not supported')
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
330
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
331
    # /* sanity check */
332
    if (data != top or size != 0):
333
        ## /* XXX: error("delta replay has gone wild"); */
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
334
        raise RuntimeError('Did not extract the number of bytes we expected'
335
            ' we were left with %d bytes in "size", and top - data = %d'
336
            % (size, <int>(top - data)))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
337
        return None
338
339
    # *dst_size = out - dst_buf;
340
    assert (out - dst_buf) == PyString_GET_SIZE(result)
341
    return result
0.23.7 by John Arbash Meinel
Add a apply_delta2 function, just in case it matters.
342
343
344
def apply_delta2(source_bytes, delta_bytes):
345
    """Apply a delta generated by make_delta to source_bytes."""
346
    # This defers to the patch-delta code rather than implementing it here
347
    # If this is faster, we can bring the memory allocation and error handling
348
    # into apply_delta(), and leave the primary loop in a separate C func.
349
    cdef char *source, *delta, *target
350
    cdef Py_ssize_t source_size, delta_size
351
    cdef unsigned long target_size
352
353
    if not PyString_CheckExact(source_bytes):
354
        raise TypeError('source is not a str')
355
    if not PyString_CheckExact(delta_bytes):
356
        raise TypeError('delta is not a str')
357
358
    source = PyString_AS_STRING(source_bytes)
359
    source_size = PyString_GET_SIZE(source_bytes)
360
    delta = PyString_AS_STRING(delta_bytes)
361
    delta_size = PyString_GET_SIZE(delta_bytes)
362
363
    target = <char *>patch_delta(source, source_size,
364
                                 delta, delta_size,
365
                                 &target_size)
366
    if target == NULL:
367
        return None
368
    result = PyString_FromStringAndSize(target, target_size)
369
    free(target)
370
    return result