127
127
def line_delta_iter(self, new_lines):
128
128
"""Generate line-based delta from this content to new_lines."""
129
new_texts = [text for origin, text in new_lines._lines]
130
old_texts = [text for origin, text in self._lines]
129
new_texts = new_lines.text()
130
old_texts = self.text()
131
131
s = KnitSequenceMatcher(None, old_texts, new_texts)
132
for op in s.get_opcodes():
132
for tag, i1, i2, j1, j2 in s.get_opcodes():
135
# ofrom oto length data
136
yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])
135
# ofrom, oto, length, data
136
yield i1, i2, j2 - j1, new_lines._lines[j1:j2]
138
138
def line_delta(self, new_lines):
139
139
return list(self.line_delta_iter(new_lines))
307
307
self.writable = (access_mode == 'w')
308
308
self.delta = delta
310
self._max_delta_chain = 200
310
312
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,
311
313
access_mode, create=create, file_mode=file_mode,
312
314
create_parent_dir=create_parent_dir, delay_create=delay_create,
320
322
return '%s(%s)' % (self.__class__.__name__,
321
323
self.transport.abspath(self.filename))
325
def _check_should_delta(self, first_parents):
326
"""Iterate back through the parent listing, looking for a fulltext.
328
This is used when we want to decide whether to add a delta or a new
329
fulltext. It searches for _max_delta_chain parents. When it finds a
330
fulltext parent, it sees if the total size of the deltas leading up to
331
it is large enough to indicate that we want a new full text anyway.
333
Return True if we should create a new delta, False if we should use a
338
delta_parents = first_parents
339
for count in xrange(self._max_delta_chain):
340
parent = delta_parents[0]
341
method = self._index.get_method(parent)
342
pos, size = self._index.get_position(parent)
343
if method == 'fulltext':
347
delta_parents = self._index.get_parents(parent)
349
# We couldn't find a fulltext, so we must create a new one
352
return fulltext_size > delta_size
323
354
def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):
324
355
"""See VersionedFile._add_delta()."""
325
356
self._check_add(version_id, []) # should we check the lines ?
357
388
# To speed the extract of texts the delta chain is limited
358
389
# to a fixed number of deltas. This should minimize both
359
390
# I/O and the time spend applying deltas.
361
delta_parents = [delta_parent]
363
parent = delta_parents[0]
364
method = self._index.get_method(parent)
365
if method == 'fulltext':
367
delta_parents = self._index.get_parents(parent)
369
if method == 'line-delta':
370
# did not find a fulltext in the delta limit.
371
# just do a normal insertion.
391
# The window was changed to a maximum of 200 deltas, but also added
392
# was a check that the total compressed size of the deltas is
393
# smaller than the compressed size of the fulltext.
394
if not self._check_should_delta([delta_parent]):
395
# We don't want a delta here, just do a normal insertion.
372
396
return super(KnitVersionedFile, self)._add_delta(version_id,
523
547
for parent_id in parents:
524
548
merge_content = self._get_content(parent_id, parent_texts)
525
seq = KnitSequenceMatcher(None, merge_content.text(), content.text())
549
seq = patiencediff.PatienceSequenceMatcher(
550
None, merge_content.text(), content.text())
526
551
if delta_seq is None:
527
552
# setup a delta seq to reuse.
539
564
reference_content = self._get_content(parents[0], parent_texts)
540
565
new_texts = content.text()
541
566
old_texts = reference_content.text()
542
delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)
567
delta_seq = patiencediff.PatienceSequenceMatcher(
568
None, old_texts, new_texts)
543
569
return self._make_line_delta(delta_seq, content)
545
571
def _make_line_delta(self, delta_seq, new_content):
666
692
# To speed the extract of texts the delta chain is limited
667
693
# to a fixed number of deltas. This should minimize both
668
694
# I/O and the time spend applying deltas.
670
delta_parents = present_parents
672
parent = delta_parents[0]
673
method = self._index.get_method(parent)
674
if method == 'fulltext':
676
delta_parents = self._index.get_parents(parent)
678
if method == 'line-delta':
695
delta = self._check_should_delta(present_parents)
681
697
lines = self.factory.make(lines, version_id)
682
698
if delta or (self.factory.annotated and len(present_parents) > 0):
823
839
data_pos, length = self._index.get_position(version_id)
824
840
version_id_records.append((version_id, data_pos, length))
827
842
total = len(version_id_records)
828
pb.update('Walking content.', count, total)
829
for version_id, data, sha_value in \
830
self._data.read_records_iter(version_id_records):
831
pb.update('Walking content.', count, total)
843
for version_idx, (version_id, data, sha_value) in \
844
enumerate(self._data.read_records_iter(version_id_records)):
845
pb.update('Walking content.', version_idx, total)
832
846
method = self._index.get_method(version_id)
833
847
version_idx = self._index.lookup(version_id)
834
848
assert method in ('fulltext', 'line-delta')
1255
1268
encode_utf8 = cache_utf8.encode
1256
for version_id, options, pos, size, parents in versions:
1257
line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),
1261
self._version_list_to_index(parents))
1262
assert isinstance(line, str), \
1263
'content must be utf-8 encoded: %r' % (line,)
1265
if not self._need_to_create:
1266
self._transport.append_bytes(self._filename, ''.join(lines))
1269
sio.write(self.HEADER)
1270
sio.writelines(lines)
1272
self._transport.put_file_non_atomic(self._filename, sio,
1273
create_parent_dir=self._create_parent_dir,
1274
mode=self._file_mode,
1275
dir_mode=self._dir_mode)
1276
self._need_to_create = False
1278
# cache after writing, so that a failed write leads to missing cache
1279
# entries not extra ones. XXX TODO: RBC 20060502 in the event of a
1280
# failure, reload the index or flush it or some such, to prevent
1281
# writing records that did complete twice.
1282
for version_id, options, pos, size, parents in versions:
1283
self._cache_version(version_id, options, pos, size, parents)
1269
orig_history = self._history[:]
1270
orig_cache = self._cache.copy()
1273
for version_id, options, pos, size, parents in versions:
1274
line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),
1278
self._version_list_to_index(parents))
1279
assert isinstance(line, str), \
1280
'content must be utf-8 encoded: %r' % (line,)
1282
self._cache_version(version_id, options, pos, size, parents)
1283
if not self._need_to_create:
1284
self._transport.append_bytes(self._filename, ''.join(lines))
1287
sio.write(self.HEADER)
1288
sio.writelines(lines)
1290
self._transport.put_file_non_atomic(self._filename, sio,
1291
create_parent_dir=self._create_parent_dir,
1292
mode=self._file_mode,
1293
dir_mode=self._dir_mode)
1294
self._need_to_create = False
1296
# If any problems happen, restore the original values and re-raise
1297
self._history = orig_history
1298
self._cache = orig_cache
1285
1301
def has_version(self, version_id):
1286
1302
"""True if the version is in the index."""
1287
1303
return (version_id in self._cache)