43
43
def add_copy(self, start_byte, end_byte):
44
44
# The data stream allows >64kB in a copy, but to match the compiled
45
45
# code, we will also limit it to a 64kB copy
46
for start_byte in range(start_byte, end_byte, 64 * 1024):
47
num_bytes = min(64 * 1024, end_byte - start_byte)
46
for start_byte in range(start_byte, end_byte, 64*1024):
47
num_bytes = min(64*1024, end_byte - start_byte)
48
48
copy_bytes = encode_copy_instruction(start_byte, num_bytes)
49
49
self.out_lines.append(copy_bytes)
50
50
self.index_lines.append(False)
59
59
self.index_lines.append(False)
60
60
self.out_lines.extend(self.cur_insert_lines)
61
61
if self.cur_insert_len < self.min_len_to_index:
62
self.index_lines.extend([False] * len(self.cur_insert_lines))
62
self.index_lines.extend([False]*len(self.cur_insert_lines))
64
self.index_lines.extend([True] * len(self.cur_insert_lines))
64
self.index_lines.extend([True]*len(self.cur_insert_lines))
65
65
self.cur_insert_lines = []
66
66
self.cur_insert_len = 0
73
73
next_len = min(127, line_len - start_index)
74
74
self.out_lines.append(int2byte(next_len))
75
75
self.index_lines.append(False)
76
self.out_lines.append(line[start_index:start_index + next_len])
76
self.out_lines.append(line[start_index:start_index+next_len])
77
77
# We don't index long lines, because we won't be able to match
78
78
# a line split across multiple inserts anway
79
79
self.index_lines.append(False)
116
116
self.line_offsets = []
117
117
self.endpoint = 0
118
118
self._matching_lines = {}
119
self.extend_lines(lines, [True] * len(lines))
119
self.extend_lines(lines, [True]*len(lines))
121
121
def _update_matching_lines(self, new_lines, index):
122
122
matches = self._matching_lines
123
123
start_idx = len(self.lines)
124
124
if len(new_lines) != len(index):
125
125
raise AssertionError('The number of lines to be indexed does'
126
' not match the index/don\'t index flags: %d != %d'
127
% (len(new_lines), len(index)))
126
' not match the index/don\'t index flags: %d != %d'
127
% (len(new_lines), len(index)))
128
128
for idx, do_index in enumerate(index):
172
172
# This is the first match in a range
173
173
prev_locations = locations
175
locations = None # Consumed
175
locations = None # Consumed
177
177
# We have a match started, compare to see if any of the
178
178
# current matches can be continued
182
182
# At least one of the regions continues to match
183
183
prev_locations = set(next_locations)
185
locations = None # Consumed
185
locations = None # Consumed
187
187
# All current regions no longer match.
188
188
# This line does still match something, just not at the
228
228
if block[-1] < min_match_bytes:
229
229
# This block may be a 'short' block, check
230
230
old_start, new_start, range_len = block
231
matched_bytes = sum(map(len, lines[new_start:new_start + range_len]))
231
matched_bytes = sum(map(len,
232
lines[new_start:new_start + range_len]))
232
233
if matched_bytes < min_match_bytes:
234
235
if block is not None:
251
252
self.line_offsets.append(endpoint)
252
253
if len(self.line_offsets) != len(self.lines):
253
254
raise AssertionError('Somehow the line offset indicator'
254
' got out of sync with the line counter.')
255
' got out of sync with the line counter.')
255
256
self.endpoint = endpoint
257
258
def _flush_insert(self, start_linenum, end_linenum,
265
266
out_lines.append(int2byte(insert_count))
266
267
# Don't index the 'insert' instruction
267
268
index_lines.append(False)
268
insert = bytes_to_insert[start_byte:start_byte + insert_count]
269
insert = bytes_to_insert[start_byte:start_byte+insert_count]
269
270
as_lines = osutils.split_lines(insert)
270
271
out_lines.extend(as_lines)
271
index_lines.extend([True] * len(as_lines))
272
index_lines.extend([True]*len(as_lines))
273
274
def _flush_copy(self, old_start_linenum, num_lines,
274
275
out_lines, index_lines):
280
281
num_bytes = stop_byte - first_byte
281
282
# The data stream allows >64kB in a copy, but to match the compiled
282
283
# code, we will also limit it to a 64kB copy
283
for start_byte in range(first_byte, stop_byte, 64 * 1024):
284
num_bytes = min(64 * 1024, stop_byte - start_byte)
284
for start_byte in range(first_byte, stop_byte, 64*1024):
285
num_bytes = min(64*1024, stop_byte - start_byte)
285
286
copy_bytes = encode_copy_instruction(start_byte, num_bytes)
286
287
out_lines.append(copy_bytes)
287
288
index_lines.append(False)
289
def make_delta(self, new_lines, bytes_length, soft=False):
290
def make_delta(self, new_lines, bytes_length=None, soft=False):
290
291
"""Compute the delta for this content versus the original content."""
292
if bytes_length is None:
293
bytes_length = sum(map(len, new_lines))
291
294
# reserved for content type, content length
292
295
out_lines = [b'', b'', encode_base128_int(bytes_length)]
293
296
index_lines = [False, False, False]
300
303
for old_start, new_start, range_len in blocks:
301
304
if new_start != current_line_num:
302
305
# non-matching region, insert the content
303
output_handler.add_insert(
304
new_lines[current_line_num:new_start])
306
output_handler.add_insert(new_lines[current_line_num:new_start])
305
307
current_line_num = new_start + range_len
307
309
# Convert the line based offsets into byte based offsets
446
448
raise ValueError('data would copy bytes past the'
448
450
lines.append(basis[offset:last])
449
else: # Insert of 'cmd' bytes
451
else: # Insert of 'cmd' bytes
451
453
raise ValueError('Command == 0 not supported yet')
452
lines.append(delta[pos:pos + cmd])
454
lines.append(delta[pos:pos+cmd])
454
456
data = b''.join(lines)
455
457
if len(data) != target_length: