bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
1  | 
# Copyright (C) 2009 Canonical Ltd
 | 
2  | 
#
 | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
|
16  | 
||
17  | 
"""Python version of compiled extensions for doing compression.
 | 
|
18  | 
||
19  | 
We separate the implementation from the groupcompress.py to avoid importing
 | 
|
20  | 
useless stuff.
 | 
|
21  | 
"""
 | 
|
22  | 
||
| 
3735.40.7
by John Arbash Meinel
 Move even more functionality into EquivalenceTable.  | 
23  | 
from bzrlib import osutils  | 
24  | 
||
25  | 
||
| 
3735.40.13
by John Arbash Meinel
 Rename EquivalenceTable to LinesDeltaIndex.  | 
26  | 
class LinesDeltaIndex(object):  | 
27  | 
"""This class indexes matches between strings.  | 
|
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
28  | 
|
29  | 
    :ivar lines: The 'static' lines that will be preserved between runs.
 | 
|
| 
3735.40.13
by John Arbash Meinel
 Rename EquivalenceTable to LinesDeltaIndex.  | 
30  | 
    :ivar _matching_lines: A dict of {line:[matching offsets]}
 | 
31  | 
    :ivar line_offsets: The byte offset for the end of each line, used to
 | 
|
32  | 
        quickly map between a matching line number and the byte location
 | 
|
33  | 
    :ivar endpoint: The total number of bytes in self.line_offsets
 | 
|
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
34  | 
    """
 | 
35  | 
||
36  | 
def __init__(self, lines):  | 
|
| 
3735.40.13
by John Arbash Meinel
 Rename EquivalenceTable to LinesDeltaIndex.  | 
37  | 
self.lines = []  | 
| 
3735.40.6
by John Arbash Meinel
 Move more information down into EquivalenceTable.  | 
38  | 
self.line_offsets = []  | 
| 
3735.40.13
by John Arbash Meinel
 Rename EquivalenceTable to LinesDeltaIndex.  | 
39  | 
self.endpoint = 0  | 
40  | 
self._matching_lines = {}  | 
|
41  | 
self.extend_lines(lines, [True]*len(lines))  | 
|
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
42  | 
|
43  | 
def _update_matching_lines(self, new_lines, index):  | 
|
44  | 
matches = self._matching_lines  | 
|
45  | 
start_idx = len(self.lines)  | 
|
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
46  | 
if len(new_lines) != len(index):  | 
47  | 
raise AssertionError('The number of lines to be indexed does'  | 
|
48  | 
' not match the index/don\'t index flags: %d != %d'  | 
|
49  | 
% (len(new_lines), len(index)))  | 
|
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
50  | 
for idx, do_index in enumerate(index):  | 
51  | 
if not do_index:  | 
|
52  | 
                continue
 | 
|
53  | 
matches.setdefault(new_lines[idx], []).append(start_idx + idx)  | 
|
54  | 
||
55  | 
def get_matches(self, line):  | 
|
56  | 
"""Return the lines which match the line in right."""  | 
|
57  | 
try:  | 
|
58  | 
return self._matching_lines[line]  | 
|
59  | 
except KeyError:  | 
|
60  | 
return None  | 
|
61  | 
||
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
62  | 
def _get_longest_match(self, lines, pos, locations):  | 
63  | 
"""Look at all matches for the current line, return the longest.  | 
|
64  | 
||
65  | 
        :param lines: The lines we are matching against
 | 
|
66  | 
        :param pos: The current location we care about
 | 
|
67  | 
        :param locations: A list of lines that matched the current location.
 | 
|
68  | 
            This may be None, but often we'll have already found matches for
 | 
|
69  | 
            this line.
 | 
|
70  | 
        :return: (start_in_self, start_in_lines, num_lines)
 | 
|
71  | 
            All values are the offset in the list (aka the line number)
 | 
|
72  | 
            If start_in_self is None, then we have no matches, and this line
 | 
|
73  | 
            should be inserted in the target.
 | 
|
74  | 
        """
 | 
|
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
75  | 
range_start = pos  | 
76  | 
range_len = 0  | 
|
77  | 
copy_ends = None  | 
|
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
78  | 
max_pos = len(lines)  | 
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
79  | 
while pos < max_pos:  | 
80  | 
if locations is None:  | 
|
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
81  | 
                # TODO: is try/except better than get(..., None)?
 | 
| 
3735.40.14
by John Arbash Meinel
 Get rid of the self._right_lines state. It doesn't matter anymore.  | 
82  | 
try:  | 
83  | 
locations = self._matching_lines[lines[pos]]  | 
|
84  | 
except KeyError:  | 
|
85  | 
locations = None  | 
|
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
86  | 
if locations is None:  | 
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
87  | 
                # No more matches, just return whatever we have, but we know
 | 
88  | 
                # that this last position is not going to match anything
 | 
|
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
89  | 
pos += 1  | 
90  | 
                break
 | 
|
91  | 
else:  | 
|
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
92  | 
                # We have a match
 | 
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
93  | 
if copy_ends is None:  | 
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
94  | 
                    # This is the first match in a range
 | 
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
95  | 
copy_ends = [loc + 1 for loc in locations]  | 
96  | 
range_len = 1  | 
|
97  | 
locations = None # Consumed  | 
|
98  | 
else:  | 
|
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
99  | 
                    # We have a match started, compare to see if any of the
 | 
100  | 
                    # current matches can be continued
 | 
|
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
101  | 
next_locations = set(copy_ends).intersection(locations)  | 
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
102  | 
if next_locations:  | 
103  | 
                        # At least one of the regions continues to match
 | 
|
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
104  | 
copy_ends = [loc + 1 for loc in next_locations]  | 
105  | 
range_len += 1  | 
|
106  | 
locations = None # Consumed  | 
|
107  | 
else:  | 
|
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
108  | 
                        # All current regions no longer match.
 | 
109  | 
                        # This line does still match something, just not at the
 | 
|
110  | 
                        # end of the previous matches. We will return locations
 | 
|
111  | 
                        # so that we can avoid another _matching_lines lookup.
 | 
|
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
112  | 
                        break
 | 
113  | 
pos += 1  | 
|
114  | 
if copy_ends is None:  | 
|
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
115  | 
            # We have no matches, this is a pure insert
 | 
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
116  | 
return None, pos, locations  | 
117  | 
return (((min(copy_ends) - range_len, range_start, range_len)),  | 
|
118  | 
pos, locations)  | 
|
119  | 
||
120  | 
def get_matching_blocks(self, lines, soft=False):  | 
|
121  | 
"""Return the ranges in lines which match self.lines.  | 
|
122  | 
||
123  | 
        :param lines: lines to compress
 | 
|
124  | 
        :return: A list of (old_start, new_start, length) tuples which reflect
 | 
|
125  | 
            a region in self.lines that is present in lines.  The last element
 | 
|
126  | 
            of the list is always (old_len, new_len, 0) to provide a end point
 | 
|
127  | 
            for generating instructions from the matching blocks list.
 | 
|
128  | 
        """
 | 
|
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
129  | 
        # In this code, we iterate over multiple _get_longest_match calls, to
 | 
130  | 
        # find the next longest copy, and possible insert regions. We then
 | 
|
131  | 
        # convert that to the simple matching_blocks representation, since
 | 
|
132  | 
        # otherwise inserting 10 lines in a row would show up as 10
 | 
|
133  | 
        # instructions.
 | 
|
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
134  | 
result = []  | 
135  | 
pos = 0  | 
|
136  | 
locations = None  | 
|
137  | 
max_pos = len(lines)  | 
|
138  | 
result_append = result.append  | 
|
139  | 
min_match_bytes = 10  | 
|
140  | 
if soft:  | 
|
141  | 
min_match_bytes = 200  | 
|
142  | 
while pos < max_pos:  | 
|
| 
3735.40.14
by John Arbash Meinel
 Get rid of the self._right_lines state. It doesn't matter anymore.  | 
143  | 
block, pos, locations = self._get_longest_match(lines, pos,  | 
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
144  | 
locations)  | 
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
145  | 
if block is not None:  | 
| 
3735.40.14
by John Arbash Meinel
 Get rid of the self._right_lines state. It doesn't matter anymore.  | 
146  | 
                # Check to see if we match fewer than min_match_bytes. As we
 | 
147  | 
                # will turn this into a pure 'insert', rather than a copy.
 | 
|
148  | 
                # block[-1] is the number of lines. A quick check says if we
 | 
|
149  | 
                # have more lines than min_match_bytes, then we know we have
 | 
|
150  | 
                # enough bytes.
 | 
|
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
151  | 
if block[-1] < min_match_bytes:  | 
152  | 
                    # This block may be a 'short' block, check
 | 
|
153  | 
old_start, new_start, range_len = block  | 
|
154  | 
matched_bytes = sum(map(len,  | 
|
155  | 
lines[new_start:new_start + range_len]))  | 
|
156  | 
if matched_bytes < min_match_bytes:  | 
|
157  | 
block = None  | 
|
158  | 
if block is not None:  | 
|
159  | 
result_append(block)  | 
|
160  | 
result_append((len(self.lines), len(lines), 0))  | 
|
161  | 
return result  | 
|
162  | 
||
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
163  | 
def extend_lines(self, lines, index):  | 
164  | 
"""Add more lines to the left-lines list.  | 
|
165  | 
||
166  | 
        :param lines: A list of lines to add
 | 
|
167  | 
        :param index: A True/False for each node to define if it should be
 | 
|
168  | 
            indexed.
 | 
|
169  | 
        """
 | 
|
170  | 
self._update_matching_lines(lines, index)  | 
|
171  | 
self.lines.extend(lines)  | 
|
| 
3735.40.6
by John Arbash Meinel
 Move more information down into EquivalenceTable.  | 
172  | 
endpoint = self.endpoint  | 
173  | 
for line in lines:  | 
|
174  | 
endpoint += len(line)  | 
|
175  | 
self.line_offsets.append(endpoint)  | 
|
| 
3735.40.15
by John Arbash Meinel
 Some cleanup passes over the LinesDeltaIndex code.  | 
176  | 
if len(self.line_offsets) != len(self.lines):  | 
177  | 
raise AssertionError('Somehow the line offset indicator'  | 
|
178  | 
' got out of sync with the line counter.')  | 
|
| 
3735.40.6
by John Arbash Meinel
 Move more information down into EquivalenceTable.  | 
179  | 
self.endpoint = endpoint  | 
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
180  | 
|
| 
3735.40.7
by John Arbash Meinel
 Move even more functionality into EquivalenceTable.  | 
181  | 
def _flush_insert(self, start_linenum, end_linenum,  | 
182  | 
new_lines, out_lines, index_lines):  | 
|
183  | 
"""Add an 'insert' request to the data stream."""  | 
|
184  | 
bytes_to_insert = ''.join(new_lines[start_linenum:end_linenum])  | 
|
185  | 
insert_length = len(bytes_to_insert)  | 
|
186  | 
        # Each insert instruction is at most 127 bytes long
 | 
|
187  | 
for start_byte in xrange(0, insert_length, 127):  | 
|
188  | 
insert_count = min(insert_length - start_byte, 127)  | 
|
189  | 
out_lines.append(chr(insert_count))  | 
|
190  | 
            # Don't index the 'insert' instruction
 | 
|
191  | 
index_lines.append(False)  | 
|
192  | 
insert = bytes_to_insert[start_byte:start_byte+insert_count]  | 
|
193  | 
as_lines = osutils.split_lines(insert)  | 
|
194  | 
out_lines.extend(as_lines)  | 
|
195  | 
index_lines.extend([True]*len(as_lines))  | 
|
196  | 
||
197  | 
def _flush_copy(self, old_start_linenum, num_lines,  | 
|
198  | 
out_lines, index_lines):  | 
|
199  | 
if old_start_linenum == 0:  | 
|
200  | 
first_byte = 0  | 
|
201  | 
else:  | 
|
202  | 
first_byte = self.line_offsets[old_start_linenum - 1]  | 
|
203  | 
stop_byte = self.line_offsets[old_start_linenum + num_lines - 1]  | 
|
204  | 
num_bytes = stop_byte - first_byte  | 
|
205  | 
        # The data stream allows >64kB in a copy, but to match the compiled
 | 
|
206  | 
        # code, we will also limit it to a 64kB copy
 | 
|
207  | 
for start_byte in xrange(first_byte, stop_byte, 64*1024):  | 
|
208  | 
num_bytes = min(64*1024, stop_byte - first_byte)  | 
|
209  | 
copy_bytes = encode_copy_instruction(start_byte, num_bytes)  | 
|
210  | 
out_lines.append(copy_bytes)  | 
|
211  | 
index_lines.append(False)  | 
|
212  | 
||
| 
3735.40.11
by John Arbash Meinel
 Implement make_delta and apply_delta.  | 
213  | 
def make_delta(self, new_lines, bytes_length=None, soft=False):  | 
| 
3735.40.7
by John Arbash Meinel
 Move even more functionality into EquivalenceTable.  | 
214  | 
"""Compute the delta for this content versus the original content."""  | 
| 
3735.40.11
by John Arbash Meinel
 Implement make_delta and apply_delta.  | 
215  | 
if bytes_length is None:  | 
216  | 
bytes_length = sum(map(len, new_lines))  | 
|
217  | 
        # reserved for content type, content length
 | 
|
218  | 
out_lines = ['', '', encode_base128_int(bytes_length)]  | 
|
| 
3735.40.10
by John Arbash Meinel
 Merge in the new delta format code.  | 
219  | 
index_lines = [False, False, False]  | 
| 
3735.40.7
by John Arbash Meinel
 Move even more functionality into EquivalenceTable.  | 
220  | 
blocks = self.get_matching_blocks(new_lines, soft=soft)  | 
221  | 
current_line_num = 0  | 
|
222  | 
        # We either copy a range (while there are reusable lines) or we
 | 
|
223  | 
        # insert new lines. To find reusable lines we traverse
 | 
|
224  | 
for old_start, new_start, range_len in blocks:  | 
|
225  | 
if new_start != current_line_num:  | 
|
226  | 
                # non-matching region, insert the content
 | 
|
227  | 
self._flush_insert(current_line_num, new_start,  | 
|
228  | 
new_lines, out_lines, index_lines)  | 
|
229  | 
current_line_num = new_start + range_len  | 
|
230  | 
if range_len:  | 
|
231  | 
self._flush_copy(old_start, range_len, out_lines, index_lines)  | 
|
232  | 
return out_lines, index_lines  | 
|
233  | 
||
234  | 
||
| 
3735.40.11
by John Arbash Meinel
 Implement make_delta and apply_delta.  | 
235  | 
def encode_base128_int(val):  | 
236  | 
"""Convert an integer into a 7-bit lsb encoding."""  | 
|
237  | 
bytes = []  | 
|
238  | 
count = 0  | 
|
239  | 
while val >= 0x80:  | 
|
240  | 
bytes.append(chr((val | 0x80) & 0xFF))  | 
|
241  | 
val >>= 7  | 
|
242  | 
bytes.append(chr(val))  | 
|
243  | 
return ''.join(bytes)  | 
|
244  | 
||
245  | 
||
246  | 
def decode_base128_int(bytes):  | 
|
247  | 
"""Decode an integer from a 7-bit lsb encoding."""  | 
|
248  | 
offset = 0  | 
|
249  | 
val = 0  | 
|
250  | 
shift = 0  | 
|
251  | 
bval = ord(bytes[offset])  | 
|
252  | 
while bval >= 0x80:  | 
|
253  | 
val |= (bval & 0x7F) << shift  | 
|
254  | 
shift += 7  | 
|
255  | 
offset += 1  | 
|
256  | 
bval = ord(bytes[offset])  | 
|
257  | 
val |= bval << shift  | 
|
258  | 
offset += 1  | 
|
259  | 
return val, offset  | 
|
260  | 
||
261  | 
||
| 
3735.40.7
by John Arbash Meinel
 Move even more functionality into EquivalenceTable.  | 
262  | 
def encode_copy_instruction(offset, length):  | 
263  | 
"""Convert this offset into a control code and bytes."""  | 
|
264  | 
copy_command = 0x80  | 
|
265  | 
copy_bytes = [None]  | 
|
266  | 
||
267  | 
for copy_bit in (0x01, 0x02, 0x04, 0x08):  | 
|
268  | 
base_byte = offset & 0xff  | 
|
269  | 
if base_byte:  | 
|
270  | 
copy_command |= copy_bit  | 
|
271  | 
copy_bytes.append(chr(base_byte))  | 
|
272  | 
offset >>= 8  | 
|
273  | 
if length is None:  | 
|
274  | 
        # None is used by the test suite
 | 
|
275  | 
copy_bytes[0] = chr(copy_command)  | 
|
276  | 
return ''.join(copy_bytes)  | 
|
277  | 
if length > 0x10000:  | 
|
278  | 
raise ValueError("we don't emit copy records for lengths > 64KiB")  | 
|
279  | 
if length == 0:  | 
|
280  | 
raise ValueError("We cannot emit a copy of length 0")  | 
|
281  | 
if length != 0x10000:  | 
|
282  | 
        # A copy of length exactly 64*1024 == 0x10000 is sent as a length of 0,
 | 
|
283  | 
        # since that saves bytes for large chained copies
 | 
|
284  | 
for copy_bit in (0x10, 0x20):  | 
|
285  | 
base_byte = length & 0xff  | 
|
286  | 
if base_byte:  | 
|
287  | 
copy_command |= copy_bit  | 
|
288  | 
copy_bytes.append(chr(base_byte))  | 
|
289  | 
length >>= 8  | 
|
290  | 
copy_bytes[0] = chr(copy_command)  | 
|
291  | 
return ''.join(copy_bytes)  | 
|
292  | 
||
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
293  | 
|
| 
3735.40.11
by John Arbash Meinel
 Implement make_delta and apply_delta.  | 
294  | 
def decode_copy_instruction(bytes, cmd, pos):  | 
295  | 
"""Decode a copy instruction from the next few bytes.  | 
|
296  | 
||
297  | 
    A copy instruction is a variable number of bytes, so we will parse the
 | 
|
298  | 
    bytes we care about, and return the new position, as well as the offset and
 | 
|
299  | 
    length referred to in the bytes.
 | 
|
300  | 
||
301  | 
    :param bytes: A string of bytes
 | 
|
302  | 
    :param cmd: The command code
 | 
|
303  | 
    :param pos: The position in bytes right after the copy command
 | 
|
304  | 
    :return: (offset, length, newpos)
 | 
|
305  | 
        The offset of the copy start, the number of bytes to copy, and the
 | 
|
306  | 
        position after the last byte of the copy
 | 
|
307  | 
    """
 | 
|
308  | 
if cmd & 0x80 != 0x80:  | 
|
309  | 
raise ValueError('copy instructions must have bit 0x80 set')  | 
|
310  | 
offset = 0  | 
|
311  | 
length = 0  | 
|
312  | 
if (cmd & 0x01):  | 
|
313  | 
offset = ord(bytes[pos])  | 
|
314  | 
pos += 1  | 
|
315  | 
if (cmd & 0x02):  | 
|
316  | 
offset = offset | (ord(bytes[pos]) << 8)  | 
|
317  | 
pos += 1  | 
|
318  | 
if (cmd & 0x04):  | 
|
319  | 
offset = offset | (ord(bytes[pos]) << 16)  | 
|
320  | 
pos += 1  | 
|
321  | 
if (cmd & 0x08):  | 
|
322  | 
offset = offset | (ord(bytes[pos]) << 24)  | 
|
323  | 
pos += 1  | 
|
324  | 
if (cmd & 0x10):  | 
|
325  | 
length = ord(bytes[pos])  | 
|
326  | 
pos += 1  | 
|
327  | 
if (cmd & 0x20):  | 
|
328  | 
length = length | (ord(bytes[pos]) << 8)  | 
|
329  | 
pos += 1  | 
|
330  | 
if (cmd & 0x40):  | 
|
331  | 
length = length | (ord(bytes[pos]) << 16)  | 
|
332  | 
pos += 1  | 
|
333  | 
if length == 0:  | 
|
334  | 
length = 65536  | 
|
335  | 
return (offset, length, pos)  | 
|
336  | 
||
| 
3735.40.5
by John Arbash Meinel
 Start adding permutation tests for _groupcompress_py and _groupcompress_pyx  | 
337  | 
|
338  | 
def make_delta(source_bytes, target_bytes):  | 
|
339  | 
"""Create a delta from source to target."""  | 
|
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
340  | 
    # TODO: The checks below may not be a the right place yet.
 | 
| 
3735.40.11
by John Arbash Meinel
 Implement make_delta and apply_delta.  | 
341  | 
if type(source_bytes) is not str:  | 
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
342  | 
raise TypeError('source is not a str')  | 
| 
3735.40.11
by John Arbash Meinel
 Implement make_delta and apply_delta.  | 
343  | 
if type(target_bytes) is not str:  | 
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
344  | 
raise TypeError('target is not a str')  | 
| 
3735.40.13
by John Arbash Meinel
 Rename EquivalenceTable to LinesDeltaIndex.  | 
345  | 
line_locations = LinesDeltaIndex(osutils.split_lines(source_bytes))  | 
| 
3735.40.11
by John Arbash Meinel
 Implement make_delta and apply_delta.  | 
346  | 
delta, _ = line_locations.make_delta(osutils.split_lines(target_bytes),  | 
347  | 
bytes_length=len(target_bytes))  | 
|
348  | 
return ''.join(delta)  | 
|
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
349  | 
|
350  | 
||
351  | 
def apply_delta(basis, delta):  | 
|
352  | 
"""Apply delta to this object to become new_version_id."""  | 
|
| 
3735.40.11
by John Arbash Meinel
 Implement make_delta and apply_delta.  | 
353  | 
if type(basis) is not str:  | 
354  | 
raise TypeError('basis is not a str')  | 
|
355  | 
if type(delta) is not str:  | 
|
356  | 
raise TypeError('delta is not a str')  | 
|
357  | 
target_length, pos = decode_base128_int(delta)  | 
|
| 
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
 Groupcompress from brisbane-core.  | 
358  | 
lines = []  | 
| 
3735.40.11
by John Arbash Meinel
 Implement make_delta and apply_delta.  | 
359  | 
len_delta = len(delta)  | 
360  | 
while pos < len_delta:  | 
|
361  | 
cmd = ord(delta[pos])  | 
|
362  | 
pos += 1  | 
|
363  | 
if cmd & 0x80:  | 
|
364  | 
offset, length, pos = decode_copy_instruction(delta, cmd, pos)  | 
|
| 
3735.40.19
by John Arbash Meinel
 Implement apply_delta_to_source which doesn't have to malloc another string.  | 
365  | 
last = offset + length  | 
366  | 
if last > len(basis):  | 
|
367  | 
raise ValueError('data would copy bytes past the'  | 
|
368  | 
'end of source')  | 
|
369  | 
lines.append(basis[offset:last])  | 
|
| 
3735.40.11
by John Arbash Meinel
 Implement make_delta and apply_delta.  | 
370  | 
else: # Insert of 'cmd' bytes  | 
371  | 
if cmd == 0:  | 
|
372  | 
raise ValueError('Command == 0 not supported yet')  | 
|
373  | 
lines.append(delta[pos:pos+cmd])  | 
|
374  | 
pos += cmd  | 
|
375  | 
bytes = ''.join(lines)  | 
|
376  | 
if len(bytes) != target_length:  | 
|
377  | 
raise ValueError('Delta claimed to be %d long, but ended up'  | 
|
378  | 
' %d long' % (target_length, len(bytes)))  | 
|
379  | 
return bytes  | 
|
| 
3735.40.19
by John Arbash Meinel
 Implement apply_delta_to_source which doesn't have to malloc another string.  | 
380  | 
|
381  | 
||
382  | 
def apply_delta_to_source(source, delta_start, delta_end):  | 
|
383  | 
"""Extract a delta from source bytes, and apply it."""  | 
|
384  | 
source_size = len(source)  | 
|
385  | 
if delta_start >= source_size:  | 
|
386  | 
raise ValueError('delta starts after source')  | 
|
387  | 
if delta_end > source_size:  | 
|
388  | 
raise ValueError('delta ends after source')  | 
|
389  | 
if delta_start >= delta_end:  | 
|
390  | 
raise ValueError('delta starts after it ends')  | 
|
391  | 
delta_bytes = source[delta_start:delta_end]  | 
|
392  | 
return apply_delta(source, delta_bytes)  |