132
131
self.endpoint = 0
133
132
self.input_bytes = 0
134
133
self.labels_deltas = {}
135
self._last_delta_index = None
134
self._delta_index = _groupcompress_pyx.DeltaIndex()
137
136
def compress(self, key, chunks, expected_sha, soft=False):
138
137
"""Compress lines with label key.
170
169
new_chunks = ['label: %s\nsha1: %s\n' % (label, sha1)]
171
# PROF: 5s to this constant extra joining
172
if self._last_delta_index is not None:
173
delta_index = self._last_delta_index
175
source_text = ''.join(self.lines)
176
# XXX: We have a few possibilities here. We could consider a few
177
# different 'previous' windows, such as only the initial text,
178
# we could do something with the 'just inserted' text we could
179
# try a delta against whatever the last delta we computed,
180
# (the idea being we just computed the delta_index, so we
181
# re-use it here, and see if that is good enough, etc)
182
# PROF: 15s to building the delta index
183
delta_index = _groupcompress_pyx.make_delta_index(source_text)
184
# PROF: only 0.67s to actually create a delta
185
delta = delta_index.make_delta(target_text)
170
delta = self._delta_index.make_delta(target_text)
186
171
if (delta is None
187
172
or len(delta) > len(target_text) / 2):
188
173
# We can't delta (perhaps source_text is empty)
189
174
# so mark this as an insert
191
176
new_chunks = ['f']
192
new_chunks.extend(chunks)
194
178
new_chunks.insert(0, 'fulltext\n')
195
179
new_chunks.append('len: %s\n' % (input_len,))
196
new_chunks.extend(chunks)
197
self._last_delta_index = None
180
unadded_bytes = sum(map(len, new_chunks))
181
self._delta_index.add_source(target_text, unadded_bytes)
182
new_chunks.append(target_text)
200
new_chunks = ['d', delta]
202
187
new_chunks.insert(0, 'delta\n')
203
188
new_chunks.append('len: %s\n' % (len(delta),))
204
new_chunks.append(delta)
206
self._last_delta_index = delta_index
189
unadded_bytes = sum(map(len, new_chunks))
190
new_chunks.append(delta)
207
191
delta_start = (self.endpoint, len(self.lines))
208
192
self.output_chunks(new_chunks)
209
193
self.input_bytes += input_len