43
36
def add_copy(self, start_byte, end_byte):
44
37
# The data stream allows >64kB in a copy, but to match the compiled
45
38
# code, we will also limit it to a 64kB copy
46
for start_byte in range(start_byte, end_byte, 64 * 1024):
47
num_bytes = min(64 * 1024, end_byte - start_byte)
39
for start_byte in xrange(start_byte, end_byte, 64*1024):
40
num_bytes = min(64*1024, end_byte - start_byte)
48
41
copy_bytes = encode_copy_instruction(start_byte, num_bytes)
49
42
self.out_lines.append(copy_bytes)
50
43
self.index_lines.append(False)
55
48
if self.cur_insert_len > 127:
56
49
raise AssertionError('We cannot insert more than 127 bytes'
58
self.out_lines.append(int2byte(self.cur_insert_len))
51
self.out_lines.append(chr(self.cur_insert_len))
59
52
self.index_lines.append(False)
60
53
self.out_lines.extend(self.cur_insert_lines)
61
54
if self.cur_insert_len < self.min_len_to_index:
62
self.index_lines.extend([False] * len(self.cur_insert_lines))
55
self.index_lines.extend([False]*len(self.cur_insert_lines))
64
self.index_lines.extend([True] * len(self.cur_insert_lines))
57
self.index_lines.extend([True]*len(self.cur_insert_lines))
65
58
self.cur_insert_lines = []
66
59
self.cur_insert_len = 0
69
62
# Flush out anything pending
70
63
self._flush_insert()
71
64
line_len = len(line)
72
for start_index in range(0, line_len, 127):
65
for start_index in xrange(0, line_len, 127):
73
66
next_len = min(127, line_len - start_index)
74
self.out_lines.append(int2byte(next_len))
67
self.out_lines.append(chr(next_len))
75
68
self.index_lines.append(False)
76
self.out_lines.append(line[start_index:start_index + next_len])
69
self.out_lines.append(line[start_index:start_index+next_len])
77
70
# We don't index long lines, because we won't be able to match
78
71
# a line split across multiple inserts anway
79
72
self.index_lines.append(False)
116
109
self.line_offsets = []
117
110
self.endpoint = 0
118
111
self._matching_lines = {}
119
self.extend_lines(lines, [True] * len(lines))
112
self.extend_lines(lines, [True]*len(lines))
121
114
def _update_matching_lines(self, new_lines, index):
122
115
matches = self._matching_lines
123
116
start_idx = len(self.lines)
124
117
if len(new_lines) != len(index):
125
118
raise AssertionError('The number of lines to be indexed does'
126
' not match the index/don\'t index flags: %d != %d'
127
% (len(new_lines), len(index)))
119
' not match the index/don\'t index flags: %d != %d'
120
% (len(new_lines), len(index)))
128
121
for idx, do_index in enumerate(index):
133
126
matches[line].add(start_idx + idx)
135
matches[line] = {start_idx + idx}
128
matches[line] = set([start_idx + idx])
137
130
def get_matches(self, line):
138
131
"""Return the lines which match the line in right."""
172
165
# This is the first match in a range
173
166
prev_locations = locations
175
locations = None # Consumed
168
locations = None # Consumed
177
170
# We have a match started, compare to see if any of the
178
171
# current matches can be continued
182
175
# At least one of the regions continues to match
183
176
prev_locations = set(next_locations)
185
locations = None # Consumed
178
locations = None # Consumed
187
180
# All current regions no longer match.
188
181
# This line does still match something, just not at the
228
221
if block[-1] < min_match_bytes:
229
222
# This block may be a 'short' block, check
230
223
old_start, new_start, range_len = block
231
matched_bytes = sum(map(len, lines[new_start:new_start + range_len]))
224
matched_bytes = sum(map(len,
225
lines[new_start:new_start + range_len]))
232
226
if matched_bytes < min_match_bytes:
234
228
if block is not None:
251
245
self.line_offsets.append(endpoint)
252
246
if len(self.line_offsets) != len(self.lines):
253
247
raise AssertionError('Somehow the line offset indicator'
254
' got out of sync with the line counter.')
248
' got out of sync with the line counter.')
255
249
self.endpoint = endpoint
257
251
def _flush_insert(self, start_linenum, end_linenum,
258
252
new_lines, out_lines, index_lines):
259
253
"""Add an 'insert' request to the data stream."""
260
bytes_to_insert = b''.join(new_lines[start_linenum:end_linenum])
254
bytes_to_insert = ''.join(new_lines[start_linenum:end_linenum])
261
255
insert_length = len(bytes_to_insert)
262
256
# Each insert instruction is at most 127 bytes long
263
for start_byte in range(0, insert_length, 127):
257
for start_byte in xrange(0, insert_length, 127):
264
258
insert_count = min(insert_length - start_byte, 127)
265
out_lines.append(int2byte(insert_count))
259
out_lines.append(chr(insert_count))
266
260
# Don't index the 'insert' instruction
267
261
index_lines.append(False)
268
insert = bytes_to_insert[start_byte:start_byte + insert_count]
262
insert = bytes_to_insert[start_byte:start_byte+insert_count]
269
263
as_lines = osutils.split_lines(insert)
270
264
out_lines.extend(as_lines)
271
index_lines.extend([True] * len(as_lines))
265
index_lines.extend([True]*len(as_lines))
273
267
def _flush_copy(self, old_start_linenum, num_lines,
274
268
out_lines, index_lines):
280
274
num_bytes = stop_byte - first_byte
281
275
# The data stream allows >64kB in a copy, but to match the compiled
282
276
# code, we will also limit it to a 64kB copy
283
for start_byte in range(first_byte, stop_byte, 64 * 1024):
284
num_bytes = min(64 * 1024, stop_byte - start_byte)
277
for start_byte in xrange(first_byte, stop_byte, 64*1024):
278
num_bytes = min(64*1024, stop_byte - start_byte)
285
279
copy_bytes = encode_copy_instruction(start_byte, num_bytes)
286
280
out_lines.append(copy_bytes)
287
281
index_lines.append(False)
289
def make_delta(self, new_lines, bytes_length, soft=False):
283
def make_delta(self, new_lines, bytes_length=None, soft=False):
290
284
"""Compute the delta for this content versus the original content."""
285
if bytes_length is None:
286
bytes_length = sum(map(len, new_lines))
291
287
# reserved for content type, content length
292
out_lines = [b'', b'', encode_base128_int(bytes_length)]
288
out_lines = ['', '', encode_base128_int(bytes_length)]
293
289
index_lines = [False, False, False]
294
290
output_handler = _OutputHandler(out_lines, index_lines,
295
291
self._MIN_MATCH_BYTES)
300
296
for old_start, new_start, range_len in blocks:
301
297
if new_start != current_line_num:
302
298
# non-matching region, insert the content
303
output_handler.add_insert(
304
new_lines[current_line_num:new_start])
299
output_handler.add_insert(new_lines[current_line_num:new_start])
305
300
current_line_num = new_start + range_len
307
302
# Convert the line based offsets into byte based offsets
317
312
def encode_base128_int(val):
318
313
"""Convert an integer into a 7-bit lsb encoding."""
321
316
while val >= 0x80:
322
data.append((val | 0x80) & 0xFF)
317
bytes.append(chr((val | 0x80) & 0xFF))
328
def decode_base128_int(data):
319
bytes.append(chr(val))
320
return ''.join(bytes)
323
def decode_base128_int(bytes):
329
324
"""Decode an integer from a 7-bit lsb encoding."""
333
bval = indexbytes(data, offset)
328
bval = ord(bytes[offset])
334
329
while bval >= 0x80:
335
330
val |= (bval & 0x7F) << shift
338
bval = indexbytes(data, offset)
333
bval = ord(bytes[offset])
339
334
val |= bval << shift
341
336
return val, offset
365
360
base_byte = length & 0xff
367
362
copy_command |= copy_bit
368
copy_bytes.append(int2byte(base_byte))
363
copy_bytes.append(chr(base_byte))
370
copy_bytes[0] = int2byte(copy_command)
371
return b''.join(copy_bytes)
365
copy_bytes[0] = chr(copy_command)
366
return ''.join(copy_bytes)
374
369
def decode_copy_instruction(bytes, cmd, pos):
393
offset = indexbytes(bytes, pos)
388
offset = ord(bytes[pos])
396
offset = offset | (indexbytes(bytes, pos) << 8)
391
offset = offset | (ord(bytes[pos]) << 8)
399
offset = offset | (indexbytes(bytes, pos) << 16)
394
offset = offset | (ord(bytes[pos]) << 16)
402
offset = offset | (indexbytes(bytes, pos) << 24)
397
offset = offset | (ord(bytes[pos]) << 24)
405
length = indexbytes(bytes, pos)
400
length = ord(bytes[pos])
408
length = length | (indexbytes(bytes, pos) << 8)
403
length = length | (ord(bytes[pos]) << 8)
411
length = length | (indexbytes(bytes, pos) << 16)
406
length = length | (ord(bytes[pos]) << 16)
418
413
def make_delta(source_bytes, target_bytes):
419
414
"""Create a delta from source to target."""
420
if not isinstance(source_bytes, bytes):
421
raise TypeError('source is not bytes')
422
if not isinstance(target_bytes, bytes):
423
raise TypeError('target is not bytes')
415
if type(source_bytes) is not str:
416
raise TypeError('source is not a str')
417
if type(target_bytes) is not str:
418
raise TypeError('target is not a str')
424
419
line_locations = LinesDeltaIndex(osutils.split_lines(source_bytes))
425
420
delta, _ = line_locations.make_delta(osutils.split_lines(target_bytes),
426
421
bytes_length=len(target_bytes))
427
return b''.join(delta)
422
return ''.join(delta)
430
425
def apply_delta(basis, delta):
431
426
"""Apply delta to this object to become new_version_id."""
432
if not isinstance(basis, bytes):
433
raise TypeError('basis is not bytes')
434
if not isinstance(delta, bytes):
435
raise TypeError('delta is not bytes')
427
if type(basis) is not str:
428
raise TypeError('basis is not a str')
429
if type(delta) is not str:
430
raise TypeError('delta is not a str')
436
431
target_length, pos = decode_base128_int(delta)
438
433
len_delta = len(delta)
439
434
while pos < len_delta:
440
cmd = indexbytes(delta, pos)
435
cmd = ord(delta[pos])
443
438
offset, length, pos = decode_copy_instruction(delta, cmd, pos)
446
441
raise ValueError('data would copy bytes past the'
448
443
lines.append(basis[offset:last])
449
else: # Insert of 'cmd' bytes
444
else: # Insert of 'cmd' bytes
451
446
raise ValueError('Command == 0 not supported yet')
452
lines.append(delta[pos:pos + cmd])
447
lines.append(delta[pos:pos+cmd])
454
data = b''.join(lines)
455
if len(data) != target_length:
449
bytes = ''.join(lines)
450
if len(bytes) != target_length:
456
451
raise ValueError('Delta claimed to be %d long, but ended up'
457
452
' %d long' % (target_length, len(bytes)))
461
456
def apply_delta_to_source(source, delta_start, delta_end):