36
38
def add_copy(self, start_byte, end_byte):
37
39
# The data stream allows >64kB in a copy, but to match the compiled
38
40
# code, we will also limit it to a 64kB copy
39
for start_byte in range(start_byte, end_byte, 64 * 1024):
40
num_bytes = min(64 * 1024, end_byte - start_byte)
41
for start_byte in xrange(start_byte, end_byte, 64*1024):
42
num_bytes = min(64*1024, end_byte - start_byte)
41
43
copy_bytes = encode_copy_instruction(start_byte, num_bytes)
42
44
self.out_lines.append(copy_bytes)
43
45
self.index_lines.append(False)
48
50
if self.cur_insert_len > 127:
49
51
raise AssertionError('We cannot insert more than 127 bytes'
51
self.out_lines.append(bytes([self.cur_insert_len]))
53
self.out_lines.append(chr(self.cur_insert_len))
52
54
self.index_lines.append(False)
53
55
self.out_lines.extend(self.cur_insert_lines)
54
56
if self.cur_insert_len < self.min_len_to_index:
55
self.index_lines.extend([False] * len(self.cur_insert_lines))
57
self.index_lines.extend([False]*len(self.cur_insert_lines))
57
self.index_lines.extend([True] * len(self.cur_insert_lines))
59
self.index_lines.extend([True]*len(self.cur_insert_lines))
58
60
self.cur_insert_lines = []
59
61
self.cur_insert_len = 0
62
64
# Flush out anything pending
63
65
self._flush_insert()
64
66
line_len = len(line)
65
for start_index in range(0, line_len, 127):
67
for start_index in xrange(0, line_len, 127):
66
68
next_len = min(127, line_len - start_index)
67
self.out_lines.append(bytes([next_len]))
69
self.out_lines.append(chr(next_len))
68
70
self.index_lines.append(False)
69
self.out_lines.append(line[start_index:start_index + next_len])
71
self.out_lines.append(line[start_index:start_index+next_len])
70
72
# We don't index long lines, because we won't be able to match
71
73
# a line split across multiple inserts anway
72
74
self.index_lines.append(False)
109
111
self.line_offsets = []
110
112
self.endpoint = 0
111
113
self._matching_lines = {}
112
self.extend_lines(lines, [True] * len(lines))
114
self.extend_lines(lines, [True]*len(lines))
114
116
def _update_matching_lines(self, new_lines, index):
115
117
matches = self._matching_lines
116
118
start_idx = len(self.lines)
117
119
if len(new_lines) != len(index):
118
120
raise AssertionError('The number of lines to be indexed does'
119
' not match the index/don\'t index flags: %d != %d'
120
% (len(new_lines), len(index)))
121
' not match the index/don\'t index flags: %d != %d'
122
% (len(new_lines), len(index)))
121
123
for idx, do_index in enumerate(index):
126
128
matches[line].add(start_idx + idx)
128
matches[line] = {start_idx + idx}
130
matches[line] = set([start_idx + idx])
130
132
def get_matches(self, line):
131
133
"""Return the lines which match the line in right."""
165
167
# This is the first match in a range
166
168
prev_locations = locations
168
locations = None # Consumed
170
locations = None # Consumed
170
172
# We have a match started, compare to see if any of the
171
173
# current matches can be continued
175
177
# At least one of the regions continues to match
176
178
prev_locations = set(next_locations)
178
locations = None # Consumed
180
locations = None # Consumed
180
182
# All current regions no longer match.
181
183
# This line does still match something, just not at the
221
223
if block[-1] < min_match_bytes:
222
224
# This block may be a 'short' block, check
223
225
old_start, new_start, range_len = block
224
matched_bytes = sum(map(len, lines[new_start:new_start + range_len]))
226
matched_bytes = sum(map(len,
227
lines[new_start:new_start + range_len]))
225
228
if matched_bytes < min_match_bytes:
227
230
if block is not None:
244
247
self.line_offsets.append(endpoint)
245
248
if len(self.line_offsets) != len(self.lines):
246
249
raise AssertionError('Somehow the line offset indicator'
247
' got out of sync with the line counter.')
250
' got out of sync with the line counter.')
248
251
self.endpoint = endpoint
250
253
def _flush_insert(self, start_linenum, end_linenum,
251
254
new_lines, out_lines, index_lines):
252
255
"""Add an 'insert' request to the data stream."""
253
bytes_to_insert = b''.join(new_lines[start_linenum:end_linenum])
256
bytes_to_insert = ''.join(new_lines[start_linenum:end_linenum])
254
257
insert_length = len(bytes_to_insert)
255
258
# Each insert instruction is at most 127 bytes long
256
for start_byte in range(0, insert_length, 127):
259
for start_byte in xrange(0, insert_length, 127):
257
260
insert_count = min(insert_length - start_byte, 127)
258
out_lines.append(bytes([insert_count]))
261
out_lines.append(chr(insert_count))
259
262
# Don't index the 'insert' instruction
260
263
index_lines.append(False)
261
insert = bytes_to_insert[start_byte:start_byte + insert_count]
264
insert = bytes_to_insert[start_byte:start_byte+insert_count]
262
265
as_lines = osutils.split_lines(insert)
263
266
out_lines.extend(as_lines)
264
index_lines.extend([True] * len(as_lines))
267
index_lines.extend([True]*len(as_lines))
266
269
def _flush_copy(self, old_start_linenum, num_lines,
267
270
out_lines, index_lines):
273
276
num_bytes = stop_byte - first_byte
274
277
# The data stream allows >64kB in a copy, but to match the compiled
275
278
# code, we will also limit it to a 64kB copy
276
for start_byte in range(first_byte, stop_byte, 64 * 1024):
277
num_bytes = min(64 * 1024, stop_byte - start_byte)
279
for start_byte in xrange(first_byte, stop_byte, 64*1024):
280
num_bytes = min(64*1024, stop_byte - start_byte)
278
281
copy_bytes = encode_copy_instruction(start_byte, num_bytes)
279
282
out_lines.append(copy_bytes)
280
283
index_lines.append(False)
282
def make_delta(self, new_lines, bytes_length, soft=False):
285
def make_delta(self, new_lines, bytes_length=None, soft=False):
283
286
"""Compute the delta for this content versus the original content."""
287
if bytes_length is None:
288
bytes_length = sum(map(len, new_lines))
284
289
# reserved for content type, content length
285
out_lines = [b'', b'', encode_base128_int(bytes_length)]
290
out_lines = ['', '', encode_base128_int(bytes_length)]
286
291
index_lines = [False, False, False]
287
292
output_handler = _OutputHandler(out_lines, index_lines,
288
293
self._MIN_MATCH_BYTES)
293
298
for old_start, new_start, range_len in blocks:
294
299
if new_start != current_line_num:
295
300
# non-matching region, insert the content
296
output_handler.add_insert(
297
new_lines[current_line_num:new_start])
301
output_handler.add_insert(new_lines[current_line_num:new_start])
298
302
current_line_num = new_start + range_len
300
304
# Convert the line based offsets into byte based offsets
310
314
def encode_base128_int(val):
311
315
"""Convert an integer into a 7-bit lsb encoding."""
314
318
while val >= 0x80:
315
data.append((val | 0x80) & 0xFF)
319
bytes.append(chr((val | 0x80) & 0xFF))
321
def decode_base128_int(data):
321
bytes.append(chr(val))
322
return ''.join(bytes)
325
def decode_base128_int(bytes):
322
326
"""Decode an integer from a 7-bit lsb encoding."""
330
bval = ord(bytes[offset])
327
331
while bval >= 0x80:
328
332
val |= (bval & 0x7F) << shift
335
bval = ord(bytes[offset])
332
336
val |= bval << shift
334
338
return val, offset
358
362
base_byte = length & 0xff
360
364
copy_command |= copy_bit
361
copy_bytes.append(bytes([base_byte]))
365
copy_bytes.append(chr(base_byte))
363
copy_bytes[0] = bytes([copy_command])
364
return b''.join(copy_bytes)
367
copy_bytes[0] = chr(copy_command)
368
return ''.join(copy_bytes)
367
371
def decode_copy_instruction(bytes, cmd, pos):
390
offset = ord(bytes[pos])
389
offset = offset | (bytes[pos] << 8)
393
offset = offset | (ord(bytes[pos]) << 8)
392
offset = offset | (bytes[pos] << 16)
396
offset = offset | (ord(bytes[pos]) << 16)
395
offset = offset | (bytes[pos] << 24)
399
offset = offset | (ord(bytes[pos]) << 24)
402
length = ord(bytes[pos])
401
length = length | (bytes[pos] << 8)
405
length = length | (ord(bytes[pos]) << 8)
404
length = length | (bytes[pos] << 16)
408
length = length | (ord(bytes[pos]) << 16)
411
415
def make_delta(source_bytes, target_bytes):
412
416
"""Create a delta from source to target."""
413
if not isinstance(source_bytes, bytes):
414
raise TypeError('source is not bytes')
415
if not isinstance(target_bytes, bytes):
416
raise TypeError('target is not bytes')
417
if type(source_bytes) is not str:
418
raise TypeError('source is not a str')
419
if type(target_bytes) is not str:
420
raise TypeError('target is not a str')
417
421
line_locations = LinesDeltaIndex(osutils.split_lines(source_bytes))
418
422
delta, _ = line_locations.make_delta(osutils.split_lines(target_bytes),
419
423
bytes_length=len(target_bytes))
420
return b''.join(delta)
424
return ''.join(delta)
423
427
def apply_delta(basis, delta):
424
428
"""Apply delta to this object to become new_version_id."""
425
if not isinstance(basis, bytes):
426
raise TypeError('basis is not bytes')
427
if not isinstance(delta, bytes):
428
raise TypeError('delta is not bytes')
429
if type(basis) is not str:
430
raise TypeError('basis is not a str')
431
if type(delta) is not str:
432
raise TypeError('delta is not a str')
429
433
target_length, pos = decode_base128_int(delta)
431
435
len_delta = len(delta)
432
436
while pos < len_delta:
437
cmd = ord(delta[pos])
436
440
offset, length, pos = decode_copy_instruction(delta, cmd, pos)
439
443
raise ValueError('data would copy bytes past the'
441
445
lines.append(basis[offset:last])
442
else: # Insert of 'cmd' bytes
446
else: # Insert of 'cmd' bytes
444
448
raise ValueError('Command == 0 not supported yet')
445
lines.append(delta[pos:pos + cmd])
449
lines.append(delta[pos:pos+cmd])
447
data = b''.join(lines)
448
if len(data) != target_length:
451
bytes = ''.join(lines)
452
if len(bytes) != target_length:
449
453
raise ValueError('Delta claimed to be %d long, but ended up'
450
454
' %d long' % (target_length, len(bytes)))
454
458
def apply_delta_to_source(source, delta_start, delta_end):