36
36
will sometimes start over and compress the whole list to get tighter
37
37
packing. We get diminishing returns after a while, so this limits the
38
38
number of times we will try.
39
In testing, some values for 100k nodes::
41
_max_repack time final node count
39
48
:cvar _default_min_compression_size: The expected minimum compression.
40
49
While packing nodes into the page, we won't Z_SYNC_FLUSH until we have
41
50
received this much input data. This saves time, because we don't bloat
42
51
the result with SYNC entries (and then need to repack), but if it is
43
set too high we will accept data that will never fit.
52
set too high we will accept data that will never fit and trigger a
85
97
return self.bytes_list, self.unused_bytes, nulls_needed
87
99
def _recompress_all_bytes_in(self, extra_bytes=None):
100
"""Recompress the current bytes_in, and optionally more.
102
:param extra_bytes: Optional, if supplied we will try to add it with
104
:return: (bytes_out, compressor, alt_compressed)
105
bytes_out is the compressed bytes returned from the compressor
106
compressor An object with everything packed in so far, and
108
alt_compressed If the compressor supports copy(), then this is a
109
snapshot just before extra_bytes is added.
110
It is (bytes_out, compressor) as well.
111
The idea is if you find you cannot fit the new
112
bytes, you don't have to start over.
113
And if you *can* you don't have to Z_SYNC_FLUSH
88
116
compressor = zlib.compressobj()
90
118
append = bytes_out.append
93
121
out = compress(accepted_bytes)
124
alt_compressed = None
126
if self.compressor_has_copy:
127
alt_compressed = (list(bytes_out), compressor.copy())
97
128
out = compress(extra_bytes)
100
131
out = compressor.flush(Z_SYNC_FLUSH)
103
return bytes_out, compressor
134
return bytes_out, compressor, alt_compressed
105
136
def write(self, bytes):
106
137
"""Write some bytes to the chunk.
156
187
# We are over budget, try to squeeze this in without any
157
188
# Z_SYNC_FLUSH calls
158
189
self.num_repack += 1
159
bytes_out, compressor = self._recompress_all_bytes_in(bytes)
160
this_len = sum(map(len, bytes_out))
161
if this_len + 10 > capacity:
190
if False and self.num_repack >= self._max_repack:
192
alt_compressed = None
194
(bytes_out, compressor,
195
alt_compressed) = self._recompress_all_bytes_in(bytes)
196
this_len = sum(map(len, bytes_out))
197
if this_len is None or this_len + 10 > capacity:
162
198
# No way we can add anymore, we need to re-pack because our
163
199
# compressor is now out of sync
164
bytes_out, compressor = self._recompress_all_bytes_in()
200
if alt_compressed is None:
201
bytes_out, compressor, _ = self._recompress_all_bytes_in()
203
bytes_out, compressor = alt_compressed
165
204
self.compressor = compressor
166
205
self.bytes_list = bytes_out
167
206
self.unused_bytes = bytes
171
209
# This fits when we pack it tighter, so use the new packing
210
if alt_compressed is not None:
211
# We know it will fit, so put it into another
212
# compressor without Z_SYNC_FLUSH
213
bytes_out, compressor = alt_compressed
214
compressor.compress(bytes)
217
# There is one Z_SYNC_FLUSH call in
218
# _recompress_all_bytes_in
172
220
self.compressor = compressor
173
221
self.bytes_in.append(bytes)
174
222
self.bytes_list = bytes_out
175
# There is one Z_SYNC_FLUSH call in
176
# _recompress_all_bytes_in
179
224
# It fit, so mark it added
180
225
self.bytes_in.append(bytes)