/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
1
# Copyright (C) 2008 Canonical Limited.
2
# 
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 2 as published
5
# by the Free Software Foundation.
6
# 
7
# This program is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
# GNU General Public License for more details.
11
# 
12
# You should have received a copy of the GNU General Public License
13
# along with this program; if not, write to the Free Software
14
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
15
# 
16
17
"""Compiled extensions for doing compression."""
18
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
19
cdef extern from *:
20
    ctypedef unsigned long size_t
21
    void * malloc(size_t)
0.18.23 by John Arbash Meinel
Now we can add more lines without having to rebuild the whole hash
22
    void * realloc(void *, size_t)
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
23
    void free(void *)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
24
    void memcpy(void *, void *, size_t)
25
26
cdef extern from "delta.h":
0.23.42 by John Arbash Meinel
Change the code around again.
27
    struct source_info:
28
        void *buf
29
        unsigned long size
30
        unsigned long agg_offset
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
31
    struct delta_index:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
32
        pass
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
33
    delta_index * create_delta_index(source_info *src, delta_index *old)
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
34
    delta_index * create_delta_index_from_delta(source_info *delta,
35
                                                delta_index *old)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
36
    void free_delta_index(delta_index *index)
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
37
    void *create_delta(delta_index *indexes,
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
38
             void *buf, unsigned long bufsize,
39
             unsigned long *delta_size, unsigned long max_delta_size)
40
    unsigned long get_delta_hdr_size(unsigned char **datap,
41
                                     unsigned char *top)
42
    Py_ssize_t DELTA_SIZE_MIN
0.23.7 by John Arbash Meinel
Add a apply_delta2 function, just in case it matters.
43
    void *patch_delta(void *src_buf, unsigned long src_size,
44
                      void *delta_buf, unsigned long delta_size,
45
                      unsigned long *dst_size)
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
46
47
cdef extern from "Python.h":
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
48
    int PyString_CheckExact(object)
49
    char * PyString_AS_STRING(object)
50
    Py_ssize_t PyString_GET_SIZE(object)
51
    object PyString_FromStringAndSize(char *, Py_ssize_t)
52
53
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
54
cdef void *safe_malloc(size_t count) except NULL:
55
    cdef void *result
56
    result = malloc(count)
57
    if result == NULL:
58
        raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
59
    return result
60
61
62
cdef void *safe_realloc(void * old, size_t count) except NULL:
63
    cdef void *result
64
    result = realloc(old, count)
65
    if result == NULL:
66
        raise MemoryError('Failed to reallocate to %d bytes of memory'
67
                          % (count,))
68
    return result
69
70
71
cdef int safe_free(void **val) except -1:
72
    assert val != NULL
73
    if val[0] != NULL:
74
        free(val[0])
75
        val[0] = NULL
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
76
0.23.17 by John Arbash Meinel
Create a wrapper function, so that lsprof will properly attribute time spent.
77
def make_delta_index(source):
78
    return DeltaIndex(source)
79
80
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
81
cdef class DeltaIndex:
82
0.23.40 by John Arbash Meinel
Add a comment why we aren't using the list type for _sources
83
    # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
84
    # isn't performance critical
85
    # cdef readonly list _sources
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
86
    cdef readonly object _sources
0.23.42 by John Arbash Meinel
Change the code around again.
87
    cdef source_info *_source_infos
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
88
    cdef delta_index *_index
0.23.42 by John Arbash Meinel
Change the code around again.
89
    cdef readonly unsigned int _max_num_sources
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
90
    cdef public unsigned long _source_offset
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
91
92
    def __repr__(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
93
        return '%s(%d, %d)' % (self.__class__.__name__,
94
            len(self._sources), self._source_offset)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
95
96
    def __init__(self, source=None):
97
        self._sources = []
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
98
        self._index = NULL
99
        self._max_num_sources = 4096
0.23.42 by John Arbash Meinel
Change the code around again.
100
        self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
101
                                                        * self._max_num_sources)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
102
        self._source_offset = 0
103
104
        if source is not None:
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
105
            self.add_source(source, 0)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
106
107
    def __dealloc__(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
108
        if self._index != NULL:
109
            free_delta_index(self._index)
110
            self._index = NULL
0.23.42 by John Arbash Meinel
Change the code around again.
111
        safe_free(<void **>&self._source_infos)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
112
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
113
    def add_delta_source(self, delta, unadded_bytes):
114
        """Add a new delta to the source texts.
115
116
        :param delta: The text of the delta, this must be a byte string.
117
        :param unadded_bytes: Number of bytes that were added to the source
118
            that were not indexed.
119
        """
120
        cdef char *c_delta
121
        cdef Py_ssize_t c_delta_size
122
        cdef delta_index *index
123
        cdef unsigned int source_location
124
        cdef source_info *src
125
        cdef unsigned int num_indexes
126
127
        if not PyString_CheckExact(delta):
128
            raise TypeError('delta is not a str')
129
130
        source_location = len(self._sources)
131
        if source_location >= self._max_num_sources:
132
            self._expand_sources()
133
        self._sources.append(delta)
134
        c_delta = PyString_AS_STRING(delta)
135
        c_delta_size = PyString_GET_SIZE(delta)
136
        src = self._source_infos + source_location
137
        src.buf = c_delta
138
        src.size = c_delta_size
139
        src.agg_offset = self._source_offset + unadded_bytes
140
        index = create_delta_index_from_delta(src, self._index)
141
        self._source_offset = src.agg_offset + src.size
142
        if index == NULL:
143
            raise RuntimeError('got back failure for adding: %r' % delta)
144
        else:
145
            free_delta_index(self._index)
146
            self._index = index
147
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
148
    def add_source(self, source, unadded_bytes):
149
        """Add a new bit of source text to the delta indexes.
150
151
        :param source: The text in question, this must be a byte string
152
        :param unadded_bytes: Assume there are this many bytes that didn't get
153
            added between this source and the end of the previous source.
154
        """
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
155
        cdef char *c_source
156
        cdef Py_ssize_t c_source_size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
157
        cdef delta_index *index
0.23.42 by John Arbash Meinel
Change the code around again.
158
        cdef unsigned int source_location
159
        cdef source_info *src
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
160
        cdef unsigned int num_indexes
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
161
162
        if not PyString_CheckExact(source):
163
            raise TypeError('source is not a str')
164
0.23.42 by John Arbash Meinel
Change the code around again.
165
        source_location = len(self._sources)
166
        if source_location >= self._max_num_sources:
167
            self._expand_sources()
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
168
        self._sources.append(source)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
169
        c_source = PyString_AS_STRING(source)
170
        c_source_size = PyString_GET_SIZE(source)
0.23.42 by John Arbash Meinel
Change the code around again.
171
        src = self._source_infos + source_location
172
        src.buf = c_source
173
        src.size = c_source_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
174
0.23.42 by John Arbash Meinel
Change the code around again.
175
        src.agg_offset = self._source_offset + unadded_bytes
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
176
        index = create_delta_index(src, self._index)
0.23.42 by John Arbash Meinel
Change the code around again.
177
        self._source_offset = src.agg_offset + src.size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
178
        if index != NULL:
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
179
            free_delta_index(self._index)
180
            self._index = index
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
181
0.23.42 by John Arbash Meinel
Change the code around again.
182
    cdef _expand_sources(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
183
        raise RuntimeError('if we move self._source_infos, then we need to'
184
                           ' change all of the index pointers as well.')
0.23.42 by John Arbash Meinel
Change the code around again.
185
        self._max_num_sources = self._max_num_sources * 2
186
        self._source_infos = <source_info *>safe_realloc(self._source_infos,
187
                                                sizeof(source_info)
188
                                                * self._max_num_sources)
189
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
190
    def make_delta(self, target_bytes, max_delta_size=0):
191
        """Create a delta from the current source to the target bytes."""
192
        cdef char *target
193
        cdef Py_ssize_t target_size
194
        cdef void * delta
195
        cdef unsigned long delta_size
196
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
197
        if self._index == NULL:
0.23.15 by John Arbash Meinel
Handle when self._index is NULL, mostly because the source text was the empty strig.
198
            return None
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
199
200
        if not PyString_CheckExact(target_bytes):
201
            raise TypeError('target is not a str')
202
203
        target = PyString_AS_STRING(target_bytes)
204
        target_size = PyString_GET_SIZE(target_bytes)
205
206
        # TODO: inline some of create_delta so we at least don't have to double
207
        #       malloc, and can instead use PyString_FromStringAndSize, to
208
        #       allocate the bytes into the final string
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
209
        delta = create_delta(self._index,
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
210
                             target, target_size,
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
211
                             &delta_size, max_delta_size)
212
        result = None
213
        if delta:
214
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
215
            free(delta)
216
        return result
217
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
218
219
def make_delta(source_bytes, target_bytes):
0.23.42 by John Arbash Meinel
Change the code around again.
220
    """Create a delta, this is a wrapper around DeltaIndex.make_delta."""
221
    di = DeltaIndex(source_bytes)
222
    return di.make_delta(target_bytes)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
223
224
225
def apply_delta(source_bytes, delta_bytes):
226
    """Apply a delta generated by make_delta to source_bytes."""
227
    cdef char *source
228
    cdef Py_ssize_t source_size
229
    cdef char *delta
230
    cdef Py_ssize_t delta_size
231
    cdef unsigned char *data, *top
232
    cdef unsigned char *dst_buf, *out, cmd
233
    cdef Py_ssize_t size
234
    cdef unsigned long cp_off, cp_size
235
236
    if not PyString_CheckExact(source_bytes):
237
        raise TypeError('source is not a str')
238
    if not PyString_CheckExact(delta_bytes):
239
        raise TypeError('delta is not a str')
240
241
    source = PyString_AS_STRING(source_bytes)
242
    source_size = PyString_GET_SIZE(source_bytes)
243
    delta = PyString_AS_STRING(delta_bytes)
244
    delta_size = PyString_GET_SIZE(delta_bytes)
245
246
    # Code taken from patch-delta.c, only brought here to give better error
247
    # handling, and to avoid double allocating memory
248
    if (delta_size < DELTA_SIZE_MIN):
249
        # XXX: Invalid delta block
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
250
        raise RuntimeError('delta_size %d smaller than min delta size %d'
251
                           % (delta_size, DELTA_SIZE_MIN))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
252
253
    data = <unsigned char *>delta
254
    top = data + delta_size
255
256
    # make sure the orig file size matches what we expect
257
    # XXX: gcc warns because data isn't defined as 'const'
258
    size = get_delta_hdr_size(&data, top)
0.23.10 by John Arbash Meinel
Allowing the source bytes to be longer than expected.
259
    if (size > source_size):
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
260
        # XXX: mismatched source size
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
261
        raise RuntimeError('source size %d < expected source size %d'
262
                           % (source_size, size))
0.23.10 by John Arbash Meinel
Allowing the source bytes to be longer than expected.
263
    source_size = size
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
264
265
    # now the result size
266
    size = get_delta_hdr_size(&data, top)
267
    result = PyString_FromStringAndSize(NULL, size)
268
    dst_buf = <unsigned char*>PyString_AS_STRING(result)
269
    # XXX: The original code added a trailing null here, but this shouldn't be
270
    #      necessary when using PyString_FromStringAndSize
271
    # dst_buf[size] = 0
272
273
    out = dst_buf
274
    while (data < top):
275
        cmd = data[0]
276
        data = data + 1
277
        if (cmd & 0x80):
278
            cp_off = cp_size = 0
279
            if (cmd & 0x01):
280
                cp_off = data[0]
281
                data = data + 1
282
            if (cmd & 0x02):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
283
                cp_off = cp_off | (data[0] << 8)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
284
                data = data + 1
285
            if (cmd & 0x04):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
286
                cp_off = cp_off | (data[0] << 16)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
287
                data = data + 1
288
            if (cmd & 0x08):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
289
                cp_off = cp_off | (data[0] << 24)
290
                data = data + 1
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
291
            if (cmd & 0x10):
292
                cp_size = data[0]
293
                data = data + 1
294
            if (cmd & 0x20):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
295
                cp_size = cp_size | (data[0] << 8)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
296
                data = data + 1
297
            if (cmd & 0x40):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
298
                cp_size = cp_size | (data[0] << 16)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
299
                data = data + 1
300
            if (cp_size == 0):
301
                cp_size = 0x10000
302
            if (cp_off + cp_size < cp_size or
303
                cp_off + cp_size > source_size or
304
                cp_size > size):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
305
                raise RuntimeError('Something wrong with:'
306
                    ' cp_off = %s, cp_size = %s'
307
                    ' source_size = %s, size = %s'
308
                    % (cp_off, cp_size, source_size, size))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
309
            memcpy(out, source + cp_off, cp_size)
310
            out = out + cp_size
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
311
            size = size - cp_size
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
312
        elif (cmd):
313
            if (cmd > size):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
314
                raise RuntimeError('Insert instruction longer than remaining'
315
                    ' bytes: %d > %d' % (cmd, size))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
316
            memcpy(out, data, cmd)
317
            out = out + cmd
318
            data = data + cmd
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
319
            size = size - cmd
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
320
        else:
321
            # /*
322
            #  * cmd == 0 is reserved for future encoding
323
            #  * extensions. In the mean time we must fail when
324
            #  * encountering them (might be data corruption).
325
            #  */
326
            ## /* XXX: error("unexpected delta opcode 0"); */
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
327
            raise RuntimeError('Got delta opcode: 0, not supported')
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
328
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
329
    # /* sanity check */
330
    if (data != top or size != 0):
331
        ## /* XXX: error("delta replay has gone wild"); */
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
332
        raise RuntimeError('Did not extract the number of bytes we expected'
333
            ' we were left with %d bytes in "size", and top - data = %d'
334
            % (size, <int>(top - data)))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
335
        return None
336
337
    # *dst_size = out - dst_buf;
338
    assert (out - dst_buf) == PyString_GET_SIZE(result)
339
    return result