/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
3
# This program is free software; you can redistribute it and/or modify
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
7
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
16
17
"""Compiled extensions for doing compression."""
18
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
19
#python2.4 support
20
cdef extern from "python-compat.h":
4265.1.3 by John Arbash Meinel
restore the old Py_ssize_t import in the pyrex files.
21
    pass
22
23
24
cdef extern from "Python.h":
5361.2.5 by John Arbash Meinel
Pyrex doesn't allow sizeof(class), so we have to unroll it manually.
25
    ctypedef struct PyObject:
26
        pass
4265.1.1 by John Arbash Meinel
Merge the a couple rev older brisbane-core into bzr.dev, most things are resolve in favor of bzr.dev
27
    ctypedef int Py_ssize_t # Required for older pyrex versions
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
28
    int PyString_CheckExact(object)
29
    char * PyString_AS_STRING(object)
30
    Py_ssize_t PyString_GET_SIZE(object)
31
    object PyString_FromStringAndSize(char *, Py_ssize_t)
32
33
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
34
cdef extern from *:
35
    ctypedef unsigned long size_t
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
36
    void * malloc(size_t) nogil
37
    void * realloc(void *, size_t) nogil
38
    void free(void *) nogil
39
    void memcpy(void *, void *, size_t) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
40
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
41
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
42
cdef extern from "delta.h":
0.23.42 by John Arbash Meinel
Change the code around again.
43
    struct source_info:
44
        void *buf
45
        unsigned long size
46
        unsigned long agg_offset
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
47
    struct delta_index:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
48
        pass
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
49
    delta_index * create_delta_index(source_info *src, delta_index *old) nogil
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
50
    delta_index * create_delta_index_from_delta(source_info *delta,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
51
                                                delta_index *old) nogil
52
    void free_delta_index(delta_index *index) nogil
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
53
    void *create_delta(delta_index *indexes,
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
54
             void *buf, unsigned long bufsize,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
55
             unsigned long *delta_size, unsigned long max_delta_size) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
56
    unsigned long get_delta_hdr_size(unsigned char **datap,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
57
                                     unsigned char *top) nogil
5361.2.3 by John Arbash Meinel
Add a __sizeof__ member for DeltaIndex.
58
    unsigned long sizeof_delta_index(delta_index *index)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
59
    Py_ssize_t DELTA_SIZE_MIN
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
60
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
61
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
62
cdef void *safe_malloc(size_t count) except NULL:
63
    cdef void *result
64
    result = malloc(count)
65
    if result == NULL:
66
        raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
67
    return result
68
69
70
cdef void *safe_realloc(void * old, size_t count) except NULL:
71
    cdef void *result
72
    result = realloc(old, count)
73
    if result == NULL:
74
        raise MemoryError('Failed to reallocate to %d bytes of memory'
75
                          % (count,))
76
    return result
77
78
79
cdef int safe_free(void **val) except -1:
80
    assert val != NULL
81
    if val[0] != NULL:
82
        free(val[0])
83
        val[0] = NULL
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
84
0.23.17 by John Arbash Meinel
Create a wrapper function, so that lsprof will properly attribute time spent.
85
def make_delta_index(source):
86
    return DeltaIndex(source)
87
88
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
89
cdef class DeltaIndex:
90
0.23.40 by John Arbash Meinel
Add a comment why we aren't using the list type for _sources
91
    # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
92
    # isn't performance critical
93
    # cdef readonly list _sources
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
94
    cdef readonly object _sources
0.23.42 by John Arbash Meinel
Change the code around again.
95
    cdef source_info *_source_infos
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
96
    cdef delta_index *_index
5361.2.5 by John Arbash Meinel
Pyrex doesn't allow sizeof(class), so we have to unroll it manually.
97
    cdef public unsigned long _source_offset
0.23.42 by John Arbash Meinel
Change the code around again.
98
    cdef readonly unsigned int _max_num_sources
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
99
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
100
    def __init__(self, source=None):
101
        self._sources = []
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
102
        self._index = NULL
0.23.53 by John Arbash Meinel
Remove the temporary adjustment for handling multiple formats of labels.
103
        self._max_num_sources = 65000
0.23.42 by John Arbash Meinel
Change the code around again.
104
        self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
105
                                                        * self._max_num_sources)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
106
        self._source_offset = 0
107
108
        if source is not None:
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
109
            self.add_source(source, 0)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
110
5361.2.3 by John Arbash Meinel
Add a __sizeof__ member for DeltaIndex.
111
    def __sizeof__(self):
112
        # We want to track the _source_infos allocations, but the referenced
113
        # void* are actually tracked in _sources itself.
5361.2.5 by John Arbash Meinel
Pyrex doesn't allow sizeof(class), so we have to unroll it manually.
114
        # XXX: Cython is capable of doing sizeof(class) and returning the size
115
        #      of the underlying struct. Pyrex (<= 0.9.9) refuses, so we need
116
        #      to do it manually. *sigh* Note that we might get it wrong
117
        #      because of alignment issues.
118
        cdef Py_ssize_t size
119
        # PyObject start, vtable *, 3 object pointers, 2 C ints
120
        size = ((sizeof(PyObject) + sizeof(void*) + 3*sizeof(PyObject*)
121
                 + sizeof(unsigned long)
122
                 + sizeof(unsigned int))
123
                + (sizeof(source_info) * self._max_num_sources)
124
                + sizeof_delta_index(self._index))
125
        return size
5361.2.3 by John Arbash Meinel
Add a __sizeof__ member for DeltaIndex.
126
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
127
    def __repr__(self):
128
        return '%s(%d, %d)' % (self.__class__.__name__,
129
            len(self._sources), self._source_offset)
130
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
131
    def __dealloc__(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
132
        if self._index != NULL:
133
            free_delta_index(self._index)
134
            self._index = NULL
0.23.42 by John Arbash Meinel
Change the code around again.
135
        safe_free(<void **>&self._source_infos)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
136
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
137
    def _has_index(self):
138
        return (self._index != NULL)
139
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
140
    def add_delta_source(self, delta, unadded_bytes):
141
        """Add a new delta to the source texts.
142
143
        :param delta: The text of the delta, this must be a byte string.
144
        :param unadded_bytes: Number of bytes that were added to the source
145
            that were not indexed.
146
        """
147
        cdef char *c_delta
148
        cdef Py_ssize_t c_delta_size
149
        cdef delta_index *index
150
        cdef unsigned int source_location
151
        cdef source_info *src
152
        cdef unsigned int num_indexes
153
154
        if not PyString_CheckExact(delta):
155
            raise TypeError('delta is not a str')
156
157
        source_location = len(self._sources)
158
        if source_location >= self._max_num_sources:
159
            self._expand_sources()
160
        self._sources.append(delta)
161
        c_delta = PyString_AS_STRING(delta)
162
        c_delta_size = PyString_GET_SIZE(delta)
163
        src = self._source_infos + source_location
164
        src.buf = c_delta
165
        src.size = c_delta_size
166
        src.agg_offset = self._source_offset + unadded_bytes
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
167
        with nogil:
168
            index = create_delta_index_from_delta(src, self._index)
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
169
        self._source_offset = src.agg_offset + src.size
0.23.49 by John Arbash Meinel
When adding new entries to the delta index, use memcpy
170
        if index != NULL:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
171
            free_delta_index(self._index)
172
            self._index = index
173
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
174
    def add_source(self, source, unadded_bytes):
175
        """Add a new bit of source text to the delta indexes.
176
177
        :param source: The text in question, this must be a byte string
178
        :param unadded_bytes: Assume there are this many bytes that didn't get
179
            added between this source and the end of the previous source.
180
        """
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
181
        cdef char *c_source
182
        cdef Py_ssize_t c_source_size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
183
        cdef delta_index *index
0.23.42 by John Arbash Meinel
Change the code around again.
184
        cdef unsigned int source_location
185
        cdef source_info *src
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
186
        cdef unsigned int num_indexes
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
187
188
        if not PyString_CheckExact(source):
189
            raise TypeError('source is not a str')
190
0.23.42 by John Arbash Meinel
Change the code around again.
191
        source_location = len(self._sources)
192
        if source_location >= self._max_num_sources:
193
            self._expand_sources()
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
194
        if source_location != 0 and self._index == NULL:
195
            # We were lazy about populating the index, create it now
196
            self._populate_first_index()
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
197
        self._sources.append(source)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
198
        c_source = PyString_AS_STRING(source)
199
        c_source_size = PyString_GET_SIZE(source)
0.23.42 by John Arbash Meinel
Change the code around again.
200
        src = self._source_infos + source_location
201
        src.buf = c_source
202
        src.size = c_source_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
203
0.23.42 by John Arbash Meinel
Change the code around again.
204
        src.agg_offset = self._source_offset + unadded_bytes
205
        self._source_offset = src.agg_offset + src.size
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
206
        # We delay creating the index on the first insert
207
        if source_location != 0:
5698.2.2 by Martin
Change create_delta_index signature so callers can treat NULL returns as MemoryError
208
            assert src.size and src.buf
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
209
            with nogil:
210
                index = create_delta_index(src, self._index)
5698.2.2 by Martin
Change create_delta_index signature so callers can treat NULL returns as MemoryError
211
            if index == NULL:
212
                raise MemoryError
213
            if index != self._index:
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
214
                free_delta_index(self._index)
215
                self._index = index
216
217
    cdef _populate_first_index(self):
218
        cdef delta_index *index
219
        if len(self._sources) != 1 or self._index != NULL:
220
            raise AssertionError('_populate_first_index should only be'
221
                ' called when we have a single source and no index yet')
222
223
        # We know that self._index is already NULL, so whatever
5698.2.2 by Martin
Change create_delta_index signature so callers can treat NULL returns as MemoryError
224
        # create_delta_index returns is fine unless there's a malloc failure
225
        assert self._source_infos[0].size and self._source_infos[0].buf
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
226
        with nogil:
227
            self._index = create_delta_index(&self._source_infos[0], NULL)
5698.2.2 by Martin
Change create_delta_index signature so callers can treat NULL returns as MemoryError
228
        if self._index == NULL:
229
            raise MemoryError
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
230
0.23.42 by John Arbash Meinel
Change the code around again.
231
    cdef _expand_sources(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
232
        raise RuntimeError('if we move self._source_infos, then we need to'
233
                           ' change all of the index pointers as well.')
0.23.42 by John Arbash Meinel
Change the code around again.
234
        self._max_num_sources = self._max_num_sources * 2
235
        self._source_infos = <source_info *>safe_realloc(self._source_infos,
236
                                                sizeof(source_info)
237
                                                * self._max_num_sources)
238
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
239
    def make_delta(self, target_bytes, max_delta_size=0):
240
        """Create a delta from the current source to the target bytes."""
241
        cdef char *target
242
        cdef Py_ssize_t target_size
243
        cdef void * delta
244
        cdef unsigned long delta_size
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
245
        cdef unsigned long c_max_delta_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
246
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
247
        if self._index == NULL:
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
248
            if len(self._sources) == 0:
249
                return None
250
            # We were just lazy about generating the index
251
            self._populate_first_index()
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
252
253
        if not PyString_CheckExact(target_bytes):
254
            raise TypeError('target is not a str')
255
256
        target = PyString_AS_STRING(target_bytes)
257
        target_size = PyString_GET_SIZE(target_bytes)
258
259
        # TODO: inline some of create_delta so we at least don't have to double
260
        #       malloc, and can instead use PyString_FromStringAndSize, to
261
        #       allocate the bytes into the final string
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
262
        c_max_delta_size = max_delta_size
263
        with nogil:
264
            delta = create_delta(self._index,
265
                                 target, target_size,
266
                                 &delta_size, c_max_delta_size)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
267
        result = None
268
        if delta:
269
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
270
            free(delta)
271
        return result
272
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
273
274
def make_delta(source_bytes, target_bytes):
0.23.42 by John Arbash Meinel
Change the code around again.
275
    """Create a delta, this is a wrapper around DeltaIndex.make_delta."""
276
    di = DeltaIndex(source_bytes)
277
    return di.make_delta(target_bytes)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
278
279
280
def apply_delta(source_bytes, delta_bytes):
281
    """Apply a delta generated by make_delta to source_bytes."""
282
    cdef char *source
283
    cdef Py_ssize_t source_size
284
    cdef char *delta
285
    cdef Py_ssize_t delta_size
286
287
    if not PyString_CheckExact(source_bytes):
288
        raise TypeError('source is not a str')
289
    if not PyString_CheckExact(delta_bytes):
290
        raise TypeError('delta is not a str')
291
    source = PyString_AS_STRING(source_bytes)
292
    source_size = PyString_GET_SIZE(source_bytes)
293
    delta = PyString_AS_STRING(delta_bytes)
294
    delta_size = PyString_GET_SIZE(delta_bytes)
295
    # Code taken from patch-delta.c, only brought here to give better error
296
    # handling, and to avoid double allocating memory
297
    if (delta_size < DELTA_SIZE_MIN):
298
        # XXX: Invalid delta block
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
299
        raise RuntimeError('delta_size %d smaller than min delta size %d'
300
                           % (delta_size, DELTA_SIZE_MIN))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
301
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
302
    return _apply_delta(source, source_size, delta, delta_size)
303
304
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
305
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
4932.1.1 by John Arbash Meinel
Merge the 2.0 branch, resolve one conflict.
306
    unsigned char cmd, unsigned int *offset,
307
    unsigned int *length) nogil: # cannot_raise
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
308
    """Decode a copy instruction from the next few bytes.
309
310
    A copy instruction is a variable number of bytes, so we will parse the
311
    bytes we care about, and return the new position, as well as the offset and
312
    length referred to in the bytes.
313
314
    :param bytes: Pointer to the start of bytes after cmd
315
    :param cmd: The command code
316
    :return: Pointer to the bytes just after the last decode byte
317
    """
318
    cdef unsigned int off, size, count
319
    off = 0
320
    size = 0
321
    count = 0
322
    if (cmd & 0x01):
323
        off = bytes[count]
324
        count = count + 1
325
    if (cmd & 0x02):
326
        off = off | (bytes[count] << 8)
327
        count = count + 1
328
    if (cmd & 0x04):
329
        off = off | (bytes[count] << 16)
330
        count = count + 1
331
    if (cmd & 0x08):
332
        off = off | (bytes[count] << 24)
333
        count = count + 1
334
    if (cmd & 0x10):
335
        size = bytes[count]
336
        count = count + 1
337
    if (cmd & 0x20):
338
        size = size | (bytes[count] << 8)
339
        count = count + 1
340
    if (cmd & 0x40):
341
        size = size | (bytes[count] << 16)
342
        count = count + 1
343
    if (size == 0):
344
        size = 0x10000
345
    offset[0] = off
346
    length[0] = size
347
    return bytes + count
348
349
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
350
cdef object _apply_delta(char *source, Py_ssize_t source_size,
351
                         char *delta, Py_ssize_t delta_size):
352
    """common functionality between apply_delta and apply_delta_to_source."""
353
    cdef unsigned char *data, *top
354
    cdef unsigned char *dst_buf, *out, cmd
355
    cdef Py_ssize_t size
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
356
    cdef unsigned int cp_off, cp_size
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
357
    cdef int failed
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
358
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
359
    data = <unsigned char *>delta
360
    top = data + delta_size
361
362
    # now the result size
363
    size = get_delta_hdr_size(&data, top)
364
    result = PyString_FromStringAndSize(NULL, size)
365
    dst_buf = <unsigned char*>PyString_AS_STRING(result)
366
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
367
    failed = 0
368
    with nogil:
369
        out = dst_buf
370
        while (data < top):
371
            cmd = data[0]
372
            data = data + 1
373
            if (cmd & 0x80):
374
                # Copy instruction
375
                data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
376
                if (cp_off + cp_size < cp_size or
377
                    cp_off + cp_size > source_size or
378
                    cp_size > size):
379
                    failed = 1
380
                    break
381
                memcpy(out, source + cp_off, cp_size)
382
                out = out + cp_size
383
                size = size - cp_size
384
            else:
385
                # Insert instruction
386
                if cmd == 0:
387
                    # cmd == 0 is reserved for future encoding
388
                    # extensions. In the mean time we must fail when
389
                    # encountering them (might be data corruption).
390
                    failed = 2
391
                    break
392
                if cmd > size:
393
                    failed = 3
394
                    break
395
                memcpy(out, data, cmd)
396
                out = out + cmd
397
                data = data + cmd
398
                size = size - cmd
399
    if failed:
400
        if failed == 1:
401
            raise ValueError('Something wrong with:'
402
                ' cp_off = %s, cp_size = %s'
403
                ' source_size = %s, size = %s'
404
                % (cp_off, cp_size, source_size, size))
405
        elif failed == 2:
406
            raise ValueError('Got delta opcode: 0, not supported')
407
        elif failed == 3:
408
            raise ValueError('Insert instruction longer than remaining'
409
                ' bytes: %d > %d' % (cmd, size))
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
410
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
411
    # sanity check
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
412
    if (data != top or size != 0):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
413
        raise RuntimeError('Did not extract the number of bytes we expected'
414
            ' we were left with %d bytes in "size", and top - data = %d'
415
            % (size, <int>(top - data)))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
416
        return None
417
418
    # *dst_size = out - dst_buf;
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
419
    if (out - dst_buf) != PyString_GET_SIZE(result):
420
        raise RuntimeError('Number of bytes extracted did not match the'
421
            ' size encoded in the delta header.')
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
422
    return result
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
423
424
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
425
def apply_delta_to_source(source, delta_start, delta_end):
426
    """Extract a delta from source bytes, and apply it."""
427
    cdef char *c_source
428
    cdef Py_ssize_t c_source_size
429
    cdef char *c_delta
430
    cdef Py_ssize_t c_delta_size
431
    cdef Py_ssize_t c_delta_start, c_delta_end
432
433
    if not PyString_CheckExact(source):
434
        raise TypeError('source is not a str')
435
    c_source_size = PyString_GET_SIZE(source)
436
    c_delta_start = delta_start
437
    c_delta_end = delta_end
438
    if c_delta_start >= c_source_size:
439
        raise ValueError('delta starts after source')
440
    if c_delta_end > c_source_size:
441
        raise ValueError('delta ends after source')
442
    if c_delta_start >= c_delta_end:
443
        raise ValueError('delta starts after it ends')
444
445
    c_delta_size = c_delta_end - c_delta_start
446
    c_source = PyString_AS_STRING(source)
447
    c_delta = c_source + c_delta_start
448
    # We don't use source_size, because we know the delta should not refer to
449
    # any bytes after it starts
450
    return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size)
451
452
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
453
def encode_base128_int(val):
454
    """Convert an integer into a 7-bit lsb encoding."""
455
    cdef unsigned int c_val
456
    cdef Py_ssize_t count
457
    cdef unsigned int num_bytes
458
    cdef unsigned char c_bytes[8] # max size for 32-bit int is 5 bytes
459
460
    c_val = val
461
    count = 0
462
    while c_val >= 0x80 and count < 8:
463
        c_bytes[count] = <unsigned char>((c_val | 0x80) & 0xFF)
464
        c_val = c_val >> 7
465
        count = count + 1
466
    if count >= 8 or c_val >= 0x80:
467
        raise ValueError('encode_base128_int overflowed the buffer')
468
    c_bytes[count] = <unsigned char>(c_val & 0xFF)
469
    count = count + 1
470
    return PyString_FromStringAndSize(<char *>c_bytes, count)
471
472
473
def decode_base128_int(bytes):
474
    """Decode an integer from a 7-bit lsb encoding."""
475
    cdef int offset
476
    cdef int val
477
    cdef unsigned int uval
478
    cdef int shift
479
    cdef Py_ssize_t num_low_bytes
480
    cdef unsigned char *c_bytes
481
482
    offset = 0
483
    val = 0
484
    shift = 0
485
    if not PyString_CheckExact(bytes):
486
        raise TypeError('bytes is not a string')
487
    c_bytes = <unsigned char*>PyString_AS_STRING(bytes)
488
    # We take off 1, because we have to be able to decode the non-expanded byte
489
    num_low_bytes = PyString_GET_SIZE(bytes) - 1
490
    while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
491
        val = val | ((c_bytes[offset] & 0x7F) << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
492
        shift = shift + 7
493
        offset = offset + 1
494
    if c_bytes[offset] & 0x80:
495
        raise ValueError('Data not properly formatted, we ran out of'
496
                         ' bytes before 0x80 stopped being set.')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
497
    val = val | (c_bytes[offset] << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
498
    offset = offset + 1
499
    if val < 0:
500
        uval = <unsigned int> val
501
        return uval, offset
502
    return val, offset
503
504