/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
1
# Copyright (C) 2009 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
3
# This program is free software; you can redistribute it and/or modify
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
7
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
16
17
"""Compiled extensions for doing compression."""
18
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
19
#python2.4 support
20
cdef extern from "python-compat.h":
4265.1.3 by John Arbash Meinel
restore the old Py_ssize_t import in the pyrex files.
21
    pass
22
23
24
cdef extern from "Python.h":
4265.1.1 by John Arbash Meinel
Merge the a couple rev older brisbane-core into bzr.dev, most things are resolve in favor of bzr.dev
25
    ctypedef int Py_ssize_t # Required for older pyrex versions
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
26
    int PyString_CheckExact(object)
27
    char * PyString_AS_STRING(object)
28
    Py_ssize_t PyString_GET_SIZE(object)
29
    object PyString_FromStringAndSize(char *, Py_ssize_t)
30
31
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
32
cdef extern from *:
33
    ctypedef unsigned long size_t
34
    void * malloc(size_t)
0.18.23 by John Arbash Meinel
Now we can add more lines without having to rebuild the whole hash
35
    void * realloc(void *, size_t)
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
36
    void free(void *)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
37
    void memcpy(void *, void *, size_t)
38
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
39
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
40
cdef extern from "delta.h":
0.23.42 by John Arbash Meinel
Change the code around again.
41
    struct source_info:
42
        void *buf
43
        unsigned long size
44
        unsigned long agg_offset
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
45
    struct delta_index:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
46
        pass
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
47
    delta_index * create_delta_index(source_info *src, delta_index *old)
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
48
    delta_index * create_delta_index_from_delta(source_info *delta,
49
                                                delta_index *old)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
50
    void free_delta_index(delta_index *index)
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
51
    void *create_delta(delta_index *indexes,
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
52
             void *buf, unsigned long bufsize,
53
             unsigned long *delta_size, unsigned long max_delta_size)
54
    unsigned long get_delta_hdr_size(unsigned char **datap,
55
                                     unsigned char *top)
56
    Py_ssize_t DELTA_SIZE_MIN
0.23.7 by John Arbash Meinel
Add a apply_delta2 function, just in case it matters.
57
    void *patch_delta(void *src_buf, unsigned long src_size,
58
                      void *delta_buf, unsigned long delta_size,
59
                      unsigned long *dst_size)
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
60
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
61
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
62
cdef void *safe_malloc(size_t count) except NULL:
63
    cdef void *result
64
    result = malloc(count)
65
    if result == NULL:
66
        raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
67
    return result
68
69
70
cdef void *safe_realloc(void * old, size_t count) except NULL:
71
    cdef void *result
72
    result = realloc(old, count)
73
    if result == NULL:
74
        raise MemoryError('Failed to reallocate to %d bytes of memory'
75
                          % (count,))
76
    return result
77
78
79
cdef int safe_free(void **val) except -1:
80
    assert val != NULL
81
    if val[0] != NULL:
82
        free(val[0])
83
        val[0] = NULL
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
84
0.23.17 by John Arbash Meinel
Create a wrapper function, so that lsprof will properly attribute time spent.
85
def make_delta_index(source):
86
    return DeltaIndex(source)
87
88
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
89
cdef class DeltaIndex:
90
0.23.40 by John Arbash Meinel
Add a comment why we aren't using the list type for _sources
91
    # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
92
    # isn't performance critical
93
    # cdef readonly list _sources
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
94
    cdef readonly object _sources
0.23.42 by John Arbash Meinel
Change the code around again.
95
    cdef source_info *_source_infos
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
96
    cdef delta_index *_index
0.23.42 by John Arbash Meinel
Change the code around again.
97
    cdef readonly unsigned int _max_num_sources
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
98
    cdef public unsigned long _source_offset
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
99
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
100
    def __init__(self, source=None):
101
        self._sources = []
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
102
        self._index = NULL
0.23.53 by John Arbash Meinel
Remove the temporary adjustment for handling multiple formats of labels.
103
        self._max_num_sources = 65000
0.23.42 by John Arbash Meinel
Change the code around again.
104
        self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
105
                                                        * self._max_num_sources)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
106
        self._source_offset = 0
107
108
        if source is not None:
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
109
            self.add_source(source, 0)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
110
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
111
    def __repr__(self):
112
        return '%s(%d, %d)' % (self.__class__.__name__,
113
            len(self._sources), self._source_offset)
114
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
115
    def __dealloc__(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
116
        if self._index != NULL:
117
            free_delta_index(self._index)
118
            self._index = NULL
0.23.42 by John Arbash Meinel
Change the code around again.
119
        safe_free(<void **>&self._source_infos)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
120
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
121
    def _has_index(self):
122
        return (self._index != NULL)
123
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
124
    def add_delta_source(self, delta, unadded_bytes):
125
        """Add a new delta to the source texts.
126
127
        :param delta: The text of the delta, this must be a byte string.
128
        :param unadded_bytes: Number of bytes that were added to the source
129
            that were not indexed.
130
        """
131
        cdef char *c_delta
132
        cdef Py_ssize_t c_delta_size
133
        cdef delta_index *index
134
        cdef unsigned int source_location
135
        cdef source_info *src
136
        cdef unsigned int num_indexes
137
138
        if not PyString_CheckExact(delta):
139
            raise TypeError('delta is not a str')
140
141
        source_location = len(self._sources)
142
        if source_location >= self._max_num_sources:
143
            self._expand_sources()
144
        self._sources.append(delta)
145
        c_delta = PyString_AS_STRING(delta)
146
        c_delta_size = PyString_GET_SIZE(delta)
147
        src = self._source_infos + source_location
148
        src.buf = c_delta
149
        src.size = c_delta_size
150
        src.agg_offset = self._source_offset + unadded_bytes
151
        index = create_delta_index_from_delta(src, self._index)
152
        self._source_offset = src.agg_offset + src.size
0.23.49 by John Arbash Meinel
When adding new entries to the delta index, use memcpy
153
        if index != NULL:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
154
            free_delta_index(self._index)
155
            self._index = index
156
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
157
    def add_source(self, source, unadded_bytes):
158
        """Add a new bit of source text to the delta indexes.
159
160
        :param source: The text in question, this must be a byte string
161
        :param unadded_bytes: Assume there are this many bytes that didn't get
162
            added between this source and the end of the previous source.
163
        """
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
164
        cdef char *c_source
165
        cdef Py_ssize_t c_source_size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
166
        cdef delta_index *index
0.23.42 by John Arbash Meinel
Change the code around again.
167
        cdef unsigned int source_location
168
        cdef source_info *src
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
169
        cdef unsigned int num_indexes
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
170
171
        if not PyString_CheckExact(source):
172
            raise TypeError('source is not a str')
173
0.23.42 by John Arbash Meinel
Change the code around again.
174
        source_location = len(self._sources)
175
        if source_location >= self._max_num_sources:
176
            self._expand_sources()
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
177
        if source_location != 0 and self._index == NULL:
178
            # We were lazy about populating the index, create it now
179
            self._populate_first_index()
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
180
        self._sources.append(source)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
181
        c_source = PyString_AS_STRING(source)
182
        c_source_size = PyString_GET_SIZE(source)
0.23.42 by John Arbash Meinel
Change the code around again.
183
        src = self._source_infos + source_location
184
        src.buf = c_source
185
        src.size = c_source_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
186
0.23.42 by John Arbash Meinel
Change the code around again.
187
        src.agg_offset = self._source_offset + unadded_bytes
188
        self._source_offset = src.agg_offset + src.size
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
189
        # We delay creating the index on the first insert
190
        if source_location != 0:
191
            index = create_delta_index(src, self._index)
192
            if index != NULL:
193
                free_delta_index(self._index)
194
                self._index = index
195
196
    cdef _populate_first_index(self):
197
        cdef delta_index *index
198
        if len(self._sources) != 1 or self._index != NULL:
199
            raise AssertionError('_populate_first_index should only be'
200
                ' called when we have a single source and no index yet')
201
202
        # We know that self._index is already NULL, so whatever
203
        # create_delta_index returns is fine
204
        self._index = create_delta_index(&self._source_infos[0], NULL)
205
        assert self._index != NULL
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
206
0.23.42 by John Arbash Meinel
Change the code around again.
207
    cdef _expand_sources(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
208
        raise RuntimeError('if we move self._source_infos, then we need to'
209
                           ' change all of the index pointers as well.')
0.23.42 by John Arbash Meinel
Change the code around again.
210
        self._max_num_sources = self._max_num_sources * 2
211
        self._source_infos = <source_info *>safe_realloc(self._source_infos,
212
                                                sizeof(source_info)
213
                                                * self._max_num_sources)
214
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
215
    def make_delta(self, target_bytes, max_delta_size=0):
216
        """Create a delta from the current source to the target bytes."""
217
        cdef char *target
218
        cdef Py_ssize_t target_size
219
        cdef void * delta
220
        cdef unsigned long delta_size
221
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
222
        if self._index == NULL:
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
223
            if len(self._sources) == 0:
224
                return None
225
            # We were just lazy about generating the index
226
            self._populate_first_index()
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
227
228
        if not PyString_CheckExact(target_bytes):
229
            raise TypeError('target is not a str')
230
231
        target = PyString_AS_STRING(target_bytes)
232
        target_size = PyString_GET_SIZE(target_bytes)
233
234
        # TODO: inline some of create_delta so we at least don't have to double
235
        #       malloc, and can instead use PyString_FromStringAndSize, to
236
        #       allocate the bytes into the final string
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
237
        delta = create_delta(self._index,
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
238
                             target, target_size,
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
239
                             &delta_size, max_delta_size)
240
        result = None
241
        if delta:
242
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
243
            free(delta)
244
        return result
245
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
246
247
def make_delta(source_bytes, target_bytes):
0.23.42 by John Arbash Meinel
Change the code around again.
248
    """Create a delta, this is a wrapper around DeltaIndex.make_delta."""
249
    di = DeltaIndex(source_bytes)
250
    return di.make_delta(target_bytes)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
251
252
253
def apply_delta(source_bytes, delta_bytes):
254
    """Apply a delta generated by make_delta to source_bytes."""
255
    cdef char *source
256
    cdef Py_ssize_t source_size
257
    cdef char *delta
258
    cdef Py_ssize_t delta_size
259
260
    if not PyString_CheckExact(source_bytes):
261
        raise TypeError('source is not a str')
262
    if not PyString_CheckExact(delta_bytes):
263
        raise TypeError('delta is not a str')
264
    source = PyString_AS_STRING(source_bytes)
265
    source_size = PyString_GET_SIZE(source_bytes)
266
    delta = PyString_AS_STRING(delta_bytes)
267
    delta_size = PyString_GET_SIZE(delta_bytes)
268
    # Code taken from patch-delta.c, only brought here to give better error
269
    # handling, and to avoid double allocating memory
270
    if (delta_size < DELTA_SIZE_MIN):
271
        # XXX: Invalid delta block
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
272
        raise RuntimeError('delta_size %d smaller than min delta size %d'
273
                           % (delta_size, DELTA_SIZE_MIN))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
274
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
275
    return _apply_delta(source, source_size, delta, delta_size)
276
277
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
278
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
279
    unsigned char cmd, unsigned int *offset, unsigned int *length):
280
    """Decode a copy instruction from the next few bytes.
281
282
    A copy instruction is a variable number of bytes, so we will parse the
283
    bytes we care about, and return the new position, as well as the offset and
284
    length referred to in the bytes.
285
286
    :param bytes: Pointer to the start of bytes after cmd
287
    :param cmd: The command code
288
    :return: Pointer to the bytes just after the last decode byte
289
    """
290
    cdef unsigned int off, size, count
291
    off = 0
292
    size = 0
293
    count = 0
294
    if (cmd & 0x01):
295
        off = bytes[count]
296
        count = count + 1
297
    if (cmd & 0x02):
298
        off = off | (bytes[count] << 8)
299
        count = count + 1
300
    if (cmd & 0x04):
301
        off = off | (bytes[count] << 16)
302
        count = count + 1
303
    if (cmd & 0x08):
304
        off = off | (bytes[count] << 24)
305
        count = count + 1
306
    if (cmd & 0x10):
307
        size = bytes[count]
308
        count = count + 1
309
    if (cmd & 0x20):
310
        size = size | (bytes[count] << 8)
311
        count = count + 1
312
    if (cmd & 0x40):
313
        size = size | (bytes[count] << 16)
314
        count = count + 1
315
    if (size == 0):
316
        size = 0x10000
317
    offset[0] = off
318
    length[0] = size
319
    return bytes + count
320
321
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
322
cdef object _apply_delta(char *source, Py_ssize_t source_size,
323
                         char *delta, Py_ssize_t delta_size):
324
    """common functionality between apply_delta and apply_delta_to_source."""
325
    cdef unsigned char *data, *top
326
    cdef unsigned char *dst_buf, *out, cmd
327
    cdef Py_ssize_t size
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
328
    cdef unsigned int cp_off, cp_size
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
329
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
330
    data = <unsigned char *>delta
331
    top = data + delta_size
332
333
    # now the result size
334
    size = get_delta_hdr_size(&data, top)
335
    result = PyString_FromStringAndSize(NULL, size)
336
    dst_buf = <unsigned char*>PyString_AS_STRING(result)
337
338
    out = dst_buf
339
    while (data < top):
340
        cmd = data[0]
341
        data = data + 1
342
        if (cmd & 0x80):
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
343
            # Copy instruction
344
            data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
345
            if (cp_off + cp_size < cp_size or
346
                cp_off + cp_size > source_size or
347
                cp_size > size):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
348
                raise RuntimeError('Something wrong with:'
349
                    ' cp_off = %s, cp_size = %s'
350
                    ' source_size = %s, size = %s'
351
                    % (cp_off, cp_size, source_size, size))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
352
            memcpy(out, source + cp_off, cp_size)
353
            out = out + cp_size
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
354
            size = size - cp_size
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
355
        else:
356
            # Insert instruction
357
            if cmd == 0:
358
                # cmd == 0 is reserved for future encoding
359
                # extensions. In the mean time we must fail when
360
                # encountering them (might be data corruption).
361
                raise RuntimeError('Got delta opcode: 0, not supported')
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
362
            if (cmd > size):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
363
                raise RuntimeError('Insert instruction longer than remaining'
364
                    ' bytes: %d > %d' % (cmd, size))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
365
            memcpy(out, data, cmd)
366
            out = out + cmd
367
            data = data + cmd
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
368
            size = size - cmd
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
369
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
370
    # sanity check
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
371
    if (data != top or size != 0):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
372
        raise RuntimeError('Did not extract the number of bytes we expected'
373
            ' we were left with %d bytes in "size", and top - data = %d'
374
            % (size, <int>(top - data)))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
375
        return None
376
377
    # *dst_size = out - dst_buf;
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
378
    if (out - dst_buf) != PyString_GET_SIZE(result):
379
        raise RuntimeError('Number of bytes extracted did not match the'
380
            ' size encoded in the delta header.')
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
381
    return result
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
382
383
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
384
def apply_delta_to_source(source, delta_start, delta_end):
385
    """Extract a delta from source bytes, and apply it."""
386
    cdef char *c_source
387
    cdef Py_ssize_t c_source_size
388
    cdef char *c_delta
389
    cdef Py_ssize_t c_delta_size
390
    cdef Py_ssize_t c_delta_start, c_delta_end
391
392
    if not PyString_CheckExact(source):
393
        raise TypeError('source is not a str')
394
    c_source_size = PyString_GET_SIZE(source)
395
    c_delta_start = delta_start
396
    c_delta_end = delta_end
397
    if c_delta_start >= c_source_size:
398
        raise ValueError('delta starts after source')
399
    if c_delta_end > c_source_size:
400
        raise ValueError('delta ends after source')
401
    if c_delta_start >= c_delta_end:
402
        raise ValueError('delta starts after it ends')
403
404
    c_delta_size = c_delta_end - c_delta_start
405
    c_source = PyString_AS_STRING(source)
406
    c_delta = c_source + c_delta_start
407
    # We don't use source_size, because we know the delta should not refer to
408
    # any bytes after it starts
409
    return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size)
410
411
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
412
def encode_base128_int(val):
413
    """Convert an integer into a 7-bit lsb encoding."""
414
    cdef unsigned int c_val
415
    cdef Py_ssize_t count
416
    cdef unsigned int num_bytes
417
    cdef unsigned char c_bytes[8] # max size for 32-bit int is 5 bytes
418
419
    c_val = val
420
    count = 0
421
    while c_val >= 0x80 and count < 8:
422
        c_bytes[count] = <unsigned char>((c_val | 0x80) & 0xFF)
423
        c_val = c_val >> 7
424
        count = count + 1
425
    if count >= 8 or c_val >= 0x80:
426
        raise ValueError('encode_base128_int overflowed the buffer')
427
    c_bytes[count] = <unsigned char>(c_val & 0xFF)
428
    count = count + 1
429
    return PyString_FromStringAndSize(<char *>c_bytes, count)
430
431
432
def decode_base128_int(bytes):
433
    """Decode an integer from a 7-bit lsb encoding."""
434
    cdef int offset
435
    cdef int val
436
    cdef unsigned int uval
437
    cdef int shift
438
    cdef Py_ssize_t num_low_bytes
439
    cdef unsigned char *c_bytes
440
441
    offset = 0
442
    val = 0
443
    shift = 0
444
    if not PyString_CheckExact(bytes):
445
        raise TypeError('bytes is not a string')
446
    c_bytes = <unsigned char*>PyString_AS_STRING(bytes)
447
    # We take off 1, because we have to be able to decode the non-expanded byte
448
    num_low_bytes = PyString_GET_SIZE(bytes) - 1
449
    while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
450
        val = val | ((c_bytes[offset] & 0x7F) << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
451
        shift = shift + 7
452
        offset = offset + 1
453
    if c_bytes[offset] & 0x80:
454
        raise ValueError('Data not properly formatted, we ran out of'
455
                         ' bytes before 0x80 stopped being set.')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
456
    val = val | (c_bytes[offset] << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
457
    offset = offset + 1
458
    if val < 0:
459
        uval = <unsigned int> val
460
        return uval, offset
461
    return val, offset
462
463