/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
3
# This program is free software; you can redistribute it and/or modify
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
7
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
16
17
"""Compiled extensions for doing compression."""
18
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
19
#python2.4 support
20
cdef extern from "python-compat.h":
4265.1.3 by John Arbash Meinel
restore the old Py_ssize_t import in the pyrex files.
21
    pass
22
23
24
cdef extern from "Python.h":
5361.2.5 by John Arbash Meinel
Pyrex doesn't allow sizeof(class), so we have to unroll it manually.
25
    ctypedef struct PyObject:
26
        pass
4265.1.1 by John Arbash Meinel
Merge the a couple rev older brisbane-core into bzr.dev, most things are resolve in favor of bzr.dev
27
    ctypedef int Py_ssize_t # Required for older pyrex versions
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
28
    int PyString_CheckExact(object)
29
    char * PyString_AS_STRING(object)
30
    Py_ssize_t PyString_GET_SIZE(object)
31
    object PyString_FromStringAndSize(char *, Py_ssize_t)
32
33
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
34
cdef extern from *:
35
    ctypedef unsigned long size_t
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
36
    void * malloc(size_t) nogil
37
    void * realloc(void *, size_t) nogil
38
    void free(void *) nogil
39
    void memcpy(void *, void *, size_t) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
40
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
41
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
42
cdef extern from "delta.h":
0.23.42 by John Arbash Meinel
Change the code around again.
43
    struct source_info:
44
        void *buf
45
        unsigned long size
46
        unsigned long agg_offset
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
47
    struct delta_index:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
48
        pass
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
49
    delta_index * create_delta_index(source_info *src, delta_index *old) nogil
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
50
    delta_index * create_delta_index_from_delta(source_info *delta,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
51
                                                delta_index *old) nogil
52
    void free_delta_index(delta_index *index) nogil
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
53
    void *create_delta(delta_index *indexes,
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
54
             void *buf, unsigned long bufsize,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
55
             unsigned long *delta_size, unsigned long max_delta_size) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
56
    unsigned long get_delta_hdr_size(unsigned char **datap,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
57
                                     unsigned char *top) nogil
5361.2.3 by John Arbash Meinel
Add a __sizeof__ member for DeltaIndex.
58
    unsigned long sizeof_delta_index(delta_index *index)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
59
    Py_ssize_t DELTA_SIZE_MIN
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
60
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
61
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
62
cdef void *safe_malloc(size_t count) except NULL:
63
    cdef void *result
64
    result = malloc(count)
65
    if result == NULL:
66
        raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
67
    return result
68
69
70
cdef void *safe_realloc(void * old, size_t count) except NULL:
71
    cdef void *result
72
    result = realloc(old, count)
73
    if result == NULL:
74
        raise MemoryError('Failed to reallocate to %d bytes of memory'
75
                          % (count,))
76
    return result
77
78
79
cdef int safe_free(void **val) except -1:
80
    assert val != NULL
81
    if val[0] != NULL:
82
        free(val[0])
83
        val[0] = NULL
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
84
0.23.17 by John Arbash Meinel
Create a wrapper function, so that lsprof will properly attribute time spent.
85
def make_delta_index(source):
86
    return DeltaIndex(source)
87
88
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
89
cdef class DeltaIndex:
90
0.23.40 by John Arbash Meinel
Add a comment why we aren't using the list type for _sources
91
    # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
92
    # isn't performance critical
93
    # cdef readonly list _sources
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
94
    cdef readonly object _sources
0.23.42 by John Arbash Meinel
Change the code around again.
95
    cdef source_info *_source_infos
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
96
    cdef delta_index *_index
5361.2.5 by John Arbash Meinel
Pyrex doesn't allow sizeof(class), so we have to unroll it manually.
97
    cdef public unsigned long _source_offset
0.23.42 by John Arbash Meinel
Change the code around again.
98
    cdef readonly unsigned int _max_num_sources
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
99
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
100
    def __init__(self, source=None):
101
        self._sources = []
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
102
        self._index = NULL
0.23.53 by John Arbash Meinel
Remove the temporary adjustment for handling multiple formats of labels.
103
        self._max_num_sources = 65000
0.23.42 by John Arbash Meinel
Change the code around again.
104
        self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
105
                                                        * self._max_num_sources)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
106
        self._source_offset = 0
107
108
        if source is not None:
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
109
            self.add_source(source, 0)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
110
5361.2.3 by John Arbash Meinel
Add a __sizeof__ member for DeltaIndex.
111
    def __sizeof__(self):
112
        # We want to track the _source_infos allocations, but the referenced
113
        # void* are actually tracked in _sources itself.
5361.2.5 by John Arbash Meinel
Pyrex doesn't allow sizeof(class), so we have to unroll it manually.
114
        # XXX: Cython is capable of doing sizeof(class) and returning the size
115
        #      of the underlying struct. Pyrex (<= 0.9.9) refuses, so we need
116
        #      to do it manually. *sigh* Note that we might get it wrong
117
        #      because of alignment issues.
118
        cdef Py_ssize_t size
119
        # PyObject start, vtable *, 3 object pointers, 2 C ints
120
        size = ((sizeof(PyObject) + sizeof(void*) + 3*sizeof(PyObject*)
121
                 + sizeof(unsigned long)
122
                 + sizeof(unsigned int))
123
                + (sizeof(source_info) * self._max_num_sources)
124
                + sizeof_delta_index(self._index))
125
        return size
5361.2.3 by John Arbash Meinel
Add a __sizeof__ member for DeltaIndex.
126
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
127
    def __repr__(self):
128
        return '%s(%d, %d)' % (self.__class__.__name__,
129
            len(self._sources), self._source_offset)
130
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
131
    def __dealloc__(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
132
        if self._index != NULL:
133
            free_delta_index(self._index)
134
            self._index = NULL
0.23.42 by John Arbash Meinel
Change the code around again.
135
        safe_free(<void **>&self._source_infos)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
136
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
137
    def _has_index(self):
138
        return (self._index != NULL)
139
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
140
    def add_delta_source(self, delta, unadded_bytes):
141
        """Add a new delta to the source texts.
142
143
        :param delta: The text of the delta, this must be a byte string.
144
        :param unadded_bytes: Number of bytes that were added to the source
145
            that were not indexed.
146
        """
147
        cdef char *c_delta
148
        cdef Py_ssize_t c_delta_size
149
        cdef delta_index *index
150
        cdef unsigned int source_location
151
        cdef source_info *src
152
        cdef unsigned int num_indexes
153
154
        if not PyString_CheckExact(delta):
155
            raise TypeError('delta is not a str')
156
157
        source_location = len(self._sources)
158
        if source_location >= self._max_num_sources:
159
            self._expand_sources()
160
        self._sources.append(delta)
161
        c_delta = PyString_AS_STRING(delta)
162
        c_delta_size = PyString_GET_SIZE(delta)
163
        src = self._source_infos + source_location
164
        src.buf = c_delta
165
        src.size = c_delta_size
166
        src.agg_offset = self._source_offset + unadded_bytes
5698.2.3 by Martin
Change create_delta_index_from_delta too so NULL can be treated as MemoryError
167
        assert src.buf and src.size and self._index
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
168
        with nogil:
169
            index = create_delta_index_from_delta(src, self._index)
5698.2.3 by Martin
Change create_delta_index_from_delta too so NULL can be treated as MemoryError
170
        if index == NULL:
171
            raise MemoryError
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
172
        self._source_offset = src.agg_offset + src.size
5698.2.3 by Martin
Change create_delta_index_from_delta too so NULL can be treated as MemoryError
173
        if index != self._index:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
174
            free_delta_index(self._index)
175
            self._index = index
176
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
177
    def add_source(self, source, unadded_bytes):
178
        """Add a new bit of source text to the delta indexes.
179
180
        :param source: The text in question, this must be a byte string
181
        :param unadded_bytes: Assume there are this many bytes that didn't get
182
            added between this source and the end of the previous source.
183
        """
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
184
        cdef char *c_source
185
        cdef Py_ssize_t c_source_size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
186
        cdef delta_index *index
0.23.42 by John Arbash Meinel
Change the code around again.
187
        cdef unsigned int source_location
188
        cdef source_info *src
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
189
        cdef unsigned int num_indexes
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
190
191
        if not PyString_CheckExact(source):
192
            raise TypeError('source is not a str')
193
0.23.42 by John Arbash Meinel
Change the code around again.
194
        source_location = len(self._sources)
195
        if source_location >= self._max_num_sources:
196
            self._expand_sources()
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
197
        if source_location != 0 and self._index == NULL:
198
            # We were lazy about populating the index, create it now
199
            self._populate_first_index()
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
200
        self._sources.append(source)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
201
        c_source = PyString_AS_STRING(source)
202
        c_source_size = PyString_GET_SIZE(source)
0.23.42 by John Arbash Meinel
Change the code around again.
203
        src = self._source_infos + source_location
204
        src.buf = c_source
205
        src.size = c_source_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
206
0.23.42 by John Arbash Meinel
Change the code around again.
207
        src.agg_offset = self._source_offset + unadded_bytes
208
        self._source_offset = src.agg_offset + src.size
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
209
        # We delay creating the index on the first insert
210
        if source_location != 0:
5698.2.2 by Martin
Change create_delta_index signature so callers can treat NULL returns as MemoryError
211
            assert src.size and src.buf
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
212
            with nogil:
213
                index = create_delta_index(src, self._index)
5698.2.2 by Martin
Change create_delta_index signature so callers can treat NULL returns as MemoryError
214
            if index == NULL:
215
                raise MemoryError
216
            if index != self._index:
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
217
                free_delta_index(self._index)
218
                self._index = index
219
220
    cdef _populate_first_index(self):
221
        cdef delta_index *index
222
        if len(self._sources) != 1 or self._index != NULL:
223
            raise AssertionError('_populate_first_index should only be'
224
                ' called when we have a single source and no index yet')
225
226
        # We know that self._index is already NULL, so whatever
5698.2.2 by Martin
Change create_delta_index signature so callers can treat NULL returns as MemoryError
227
        # create_delta_index returns is fine unless there's a malloc failure
228
        assert self._source_infos[0].size and self._source_infos[0].buf
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
229
        with nogil:
230
            self._index = create_delta_index(&self._source_infos[0], NULL)
5698.2.2 by Martin
Change create_delta_index signature so callers can treat NULL returns as MemoryError
231
        if self._index == NULL:
232
            raise MemoryError
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
233
0.23.42 by John Arbash Meinel
Change the code around again.
234
    cdef _expand_sources(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
235
        raise RuntimeError('if we move self._source_infos, then we need to'
236
                           ' change all of the index pointers as well.')
0.23.42 by John Arbash Meinel
Change the code around again.
237
        self._max_num_sources = self._max_num_sources * 2
238
        self._source_infos = <source_info *>safe_realloc(self._source_infos,
239
                                                sizeof(source_info)
240
                                                * self._max_num_sources)
241
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
242
    def make_delta(self, target_bytes, max_delta_size=0):
243
        """Create a delta from the current source to the target bytes."""
244
        cdef char *target
245
        cdef Py_ssize_t target_size
246
        cdef void * delta
247
        cdef unsigned long delta_size
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
248
        cdef unsigned long c_max_delta_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
249
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
250
        if self._index == NULL:
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
251
            if len(self._sources) == 0:
252
                return None
253
            # We were just lazy about generating the index
254
            self._populate_first_index()
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
255
256
        if not PyString_CheckExact(target_bytes):
257
            raise TypeError('target is not a str')
258
259
        target = PyString_AS_STRING(target_bytes)
260
        target_size = PyString_GET_SIZE(target_bytes)
261
262
        # TODO: inline some of create_delta so we at least don't have to double
263
        #       malloc, and can instead use PyString_FromStringAndSize, to
264
        #       allocate the bytes into the final string
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
265
        c_max_delta_size = max_delta_size
266
        with nogil:
267
            delta = create_delta(self._index,
268
                                 target, target_size,
269
                                 &delta_size, c_max_delta_size)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
270
        result = None
271
        if delta:
272
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
273
            free(delta)
274
        return result
275
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
276
277
def make_delta(source_bytes, target_bytes):
0.23.42 by John Arbash Meinel
Change the code around again.
278
    """Create a delta, this is a wrapper around DeltaIndex.make_delta."""
279
    di = DeltaIndex(source_bytes)
280
    return di.make_delta(target_bytes)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
281
282
283
def apply_delta(source_bytes, delta_bytes):
284
    """Apply a delta generated by make_delta to source_bytes."""
285
    cdef char *source
286
    cdef Py_ssize_t source_size
287
    cdef char *delta
288
    cdef Py_ssize_t delta_size
289
290
    if not PyString_CheckExact(source_bytes):
291
        raise TypeError('source is not a str')
292
    if not PyString_CheckExact(delta_bytes):
293
        raise TypeError('delta is not a str')
294
    source = PyString_AS_STRING(source_bytes)
295
    source_size = PyString_GET_SIZE(source_bytes)
296
    delta = PyString_AS_STRING(delta_bytes)
297
    delta_size = PyString_GET_SIZE(delta_bytes)
298
    # Code taken from patch-delta.c, only brought here to give better error
299
    # handling, and to avoid double allocating memory
300
    if (delta_size < DELTA_SIZE_MIN):
301
        # XXX: Invalid delta block
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
302
        raise RuntimeError('delta_size %d smaller than min delta size %d'
303
                           % (delta_size, DELTA_SIZE_MIN))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
304
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
305
    return _apply_delta(source, source_size, delta, delta_size)
306
307
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
308
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
4932.1.1 by John Arbash Meinel
Merge the 2.0 branch, resolve one conflict.
309
    unsigned char cmd, unsigned int *offset,
310
    unsigned int *length) nogil: # cannot_raise
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
311
    """Decode a copy instruction from the next few bytes.
312
313
    A copy instruction is a variable number of bytes, so we will parse the
314
    bytes we care about, and return the new position, as well as the offset and
315
    length referred to in the bytes.
316
317
    :param bytes: Pointer to the start of bytes after cmd
318
    :param cmd: The command code
319
    :return: Pointer to the bytes just after the last decode byte
320
    """
321
    cdef unsigned int off, size, count
322
    off = 0
323
    size = 0
324
    count = 0
325
    if (cmd & 0x01):
326
        off = bytes[count]
327
        count = count + 1
328
    if (cmd & 0x02):
329
        off = off | (bytes[count] << 8)
330
        count = count + 1
331
    if (cmd & 0x04):
332
        off = off | (bytes[count] << 16)
333
        count = count + 1
334
    if (cmd & 0x08):
335
        off = off | (bytes[count] << 24)
336
        count = count + 1
337
    if (cmd & 0x10):
338
        size = bytes[count]
339
        count = count + 1
340
    if (cmd & 0x20):
341
        size = size | (bytes[count] << 8)
342
        count = count + 1
343
    if (cmd & 0x40):
344
        size = size | (bytes[count] << 16)
345
        count = count + 1
346
    if (size == 0):
347
        size = 0x10000
348
    offset[0] = off
349
    length[0] = size
350
    return bytes + count
351
352
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
353
cdef object _apply_delta(char *source, Py_ssize_t source_size,
354
                         char *delta, Py_ssize_t delta_size):
355
    """common functionality between apply_delta and apply_delta_to_source."""
356
    cdef unsigned char *data, *top
357
    cdef unsigned char *dst_buf, *out, cmd
358
    cdef Py_ssize_t size
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
359
    cdef unsigned int cp_off, cp_size
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
360
    cdef int failed
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
361
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
362
    data = <unsigned char *>delta
363
    top = data + delta_size
364
365
    # now the result size
366
    size = get_delta_hdr_size(&data, top)
367
    result = PyString_FromStringAndSize(NULL, size)
368
    dst_buf = <unsigned char*>PyString_AS_STRING(result)
369
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
370
    failed = 0
371
    with nogil:
372
        out = dst_buf
373
        while (data < top):
374
            cmd = data[0]
375
            data = data + 1
376
            if (cmd & 0x80):
377
                # Copy instruction
378
                data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
379
                if (cp_off + cp_size < cp_size or
5698.2.4 by Martin
Make Py_ssize_t to uint downcast explict in _groupcompress_pyx to hush compiler warnings
380
                    cp_off + cp_size > <unsigned int>source_size or
381
                    cp_size > <unsigned int>size):
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
382
                    failed = 1
383
                    break
384
                memcpy(out, source + cp_off, cp_size)
385
                out = out + cp_size
386
                size = size - cp_size
387
            else:
388
                # Insert instruction
389
                if cmd == 0:
390
                    # cmd == 0 is reserved for future encoding
391
                    # extensions. In the mean time we must fail when
392
                    # encountering them (might be data corruption).
393
                    failed = 2
394
                    break
395
                if cmd > size:
396
                    failed = 3
397
                    break
398
                memcpy(out, data, cmd)
399
                out = out + cmd
400
                data = data + cmd
401
                size = size - cmd
402
    if failed:
403
        if failed == 1:
404
            raise ValueError('Something wrong with:'
405
                ' cp_off = %s, cp_size = %s'
406
                ' source_size = %s, size = %s'
407
                % (cp_off, cp_size, source_size, size))
408
        elif failed == 2:
409
            raise ValueError('Got delta opcode: 0, not supported')
410
        elif failed == 3:
411
            raise ValueError('Insert instruction longer than remaining'
412
                ' bytes: %d > %d' % (cmd, size))
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
413
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
414
    # sanity check
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
415
    if (data != top or size != 0):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
416
        raise RuntimeError('Did not extract the number of bytes we expected'
417
            ' we were left with %d bytes in "size", and top - data = %d'
418
            % (size, <int>(top - data)))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
419
        return None
420
421
    # *dst_size = out - dst_buf;
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
422
    if (out - dst_buf) != PyString_GET_SIZE(result):
423
        raise RuntimeError('Number of bytes extracted did not match the'
424
            ' size encoded in the delta header.')
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
425
    return result
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
426
427
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
428
def apply_delta_to_source(source, delta_start, delta_end):
429
    """Extract a delta from source bytes, and apply it."""
430
    cdef char *c_source
431
    cdef Py_ssize_t c_source_size
432
    cdef char *c_delta
433
    cdef Py_ssize_t c_delta_size
434
    cdef Py_ssize_t c_delta_start, c_delta_end
435
436
    if not PyString_CheckExact(source):
437
        raise TypeError('source is not a str')
438
    c_source_size = PyString_GET_SIZE(source)
439
    c_delta_start = delta_start
440
    c_delta_end = delta_end
441
    if c_delta_start >= c_source_size:
442
        raise ValueError('delta starts after source')
443
    if c_delta_end > c_source_size:
444
        raise ValueError('delta ends after source')
445
    if c_delta_start >= c_delta_end:
446
        raise ValueError('delta starts after it ends')
447
448
    c_delta_size = c_delta_end - c_delta_start
449
    c_source = PyString_AS_STRING(source)
450
    c_delta = c_source + c_delta_start
451
    # We don't use source_size, because we know the delta should not refer to
452
    # any bytes after it starts
453
    return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size)
454
455
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
456
def encode_base128_int(val):
457
    """Convert an integer into a 7-bit lsb encoding."""
458
    cdef unsigned int c_val
459
    cdef Py_ssize_t count
460
    cdef unsigned int num_bytes
461
    cdef unsigned char c_bytes[8] # max size for 32-bit int is 5 bytes
462
463
    c_val = val
464
    count = 0
465
    while c_val >= 0x80 and count < 8:
466
        c_bytes[count] = <unsigned char>((c_val | 0x80) & 0xFF)
467
        c_val = c_val >> 7
468
        count = count + 1
469
    if count >= 8 or c_val >= 0x80:
470
        raise ValueError('encode_base128_int overflowed the buffer')
471
    c_bytes[count] = <unsigned char>(c_val & 0xFF)
472
    count = count + 1
473
    return PyString_FromStringAndSize(<char *>c_bytes, count)
474
475
476
def decode_base128_int(bytes):
477
    """Decode an integer from a 7-bit lsb encoding."""
478
    cdef int offset
479
    cdef int val
480
    cdef unsigned int uval
481
    cdef int shift
482
    cdef Py_ssize_t num_low_bytes
483
    cdef unsigned char *c_bytes
484
485
    offset = 0
486
    val = 0
487
    shift = 0
488
    if not PyString_CheckExact(bytes):
489
        raise TypeError('bytes is not a string')
490
    c_bytes = <unsigned char*>PyString_AS_STRING(bytes)
491
    # We take off 1, because we have to be able to decode the non-expanded byte
492
    num_low_bytes = PyString_GET_SIZE(bytes) - 1
493
    while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
494
        val = val | ((c_bytes[offset] & 0x7F) << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
495
        shift = shift + 7
496
        offset = offset + 1
497
    if c_bytes[offset] & 0x80:
498
        raise ValueError('Data not properly formatted, we ran out of'
499
                         ' bytes before 0x80 stopped being set.')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
500
    val = val | (c_bytes[offset] << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
501
    offset = offset + 1
502
    if val < 0:
503
        uval = <unsigned int> val
504
        return uval, offset
505
    return val, offset
506
507