/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/chunk_writer.py

  • Committer: John Arbash Meinel
  • Date: 2008-08-20 22:44:19 UTC
  • mto: This revision was merged to the branch mainline in revision 3644.
  • Revision ID: john@arbash-meinel.com-20080820224419-j0mtnzs2myy1n1y1
Collect some info on the space/time tradeoff for _max_repack.
With the test data, we pretty much always hit max_repack before we
get everything fully packed.

Show diffs side-by-side

added added

removed removed

Lines of Context:
36
36
        will sometimes start over and compress the whole list to get tighter
37
37
        packing. We get diminishing returns after a while, so this limits the
38
38
        number of times we will try.
 
39
        In testing, some values for 100k nodes::
 
40
 
 
41
            _max_repack     time        final node count
 
42
             1               8.0s       704
 
43
             2               9.2s       491
 
44
             3              10.6s       430
 
45
             4              12.5s       406
 
46
             5              13.9s       395
 
47
            20              17.7s       390
39
48
    :cvar _default_min_compression_size: The expected minimum compression.
40
49
        While packing nodes into the page, we won't Z_SYNC_FLUSH until we have
41
50
        received this much input data. This saves time, because we don't bloat
42
51
        the result with SYNC entries (and then need to repack), but if it is
43
 
        set too high we will accept data that will never fit.
 
52
        set too high we will accept data that will never fit and trigger a
 
53
        fault later.
44
54
    """
45
55
 
46
56
    _max_repack = 2
65
75
        self.reserved_size = reserved
66
76
        self.min_compress_size = self._default_min_compression_size
67
77
        self.num_zsync = 0
 
78
        self.compressor_has_copy = (getattr(self.compressor, 'copy', None)
 
79
                                    is not None)
68
80
 
69
81
    def finish(self):
70
82
        """Finish the chunk.
85
97
        return self.bytes_list, self.unused_bytes, nulls_needed
86
98
 
87
99
    def _recompress_all_bytes_in(self, extra_bytes=None):
 
100
        """Recompress the current bytes_in, and optionally more.
 
101
 
 
102
        :param extra_bytes: Optional, if supplied we will try to add it with
 
103
            Z_SYNC_FLUSH
 
104
        :return: (bytes_out, compressor, alt_compressed)
 
105
            bytes_out   is the compressed bytes returned from the compressor
 
106
            compressor  An object with everything packed in so far, and
 
107
                        Z_SYNC_FLUSH called.
 
108
            alt_compressed  If the compressor supports copy(), then this is a
 
109
                            snapshot just before extra_bytes is added.
 
110
                            It is (bytes_out, compressor) as well.
 
111
                            The idea is if you find you cannot fit the new
 
112
                            bytes, you don't have to start over.
 
113
                            And if you *can* you don't have to Z_SYNC_FLUSH
 
114
                            yet.
 
115
        """
88
116
        compressor = zlib.compressobj()
89
117
        bytes_out = []
90
118
        append = bytes_out.append
93
121
            out = compress(accepted_bytes)
94
122
            if out:
95
123
                append(out)
 
124
        alt_compressed = None
96
125
        if extra_bytes:
 
126
            if self.compressor_has_copy:
 
127
                alt_compressed = (list(bytes_out), compressor.copy())
97
128
            out = compress(extra_bytes)
98
129
            if out:
99
130
                append(out)
100
131
            out = compressor.flush(Z_SYNC_FLUSH)
101
132
            if out:
102
133
                append(out)
103
 
        return bytes_out, compressor
 
134
        return bytes_out, compressor, alt_compressed
104
135
 
105
136
    def write(self, bytes):
106
137
        """Write some bytes to the chunk.
156
187
                # We are over budget, try to squeeze this in without any
157
188
                # Z_SYNC_FLUSH calls
158
189
                self.num_repack += 1
159
 
                bytes_out, compressor = self._recompress_all_bytes_in(bytes)
160
 
                this_len = sum(map(len, bytes_out))
161
 
                if this_len + 10 > capacity:
 
190
                if False and self.num_repack >= self._max_repack:
 
191
                    this_len = None
 
192
                    alt_compressed = None
 
193
                else:
 
194
                    (bytes_out, compressor,
 
195
                     alt_compressed) = self._recompress_all_bytes_in(bytes)
 
196
                    this_len = sum(map(len, bytes_out))
 
197
                if this_len is None or this_len + 10 > capacity:
162
198
                    # No way we can add anymore, we need to re-pack because our
163
199
                    # compressor is now out of sync
164
 
                    bytes_out, compressor = self._recompress_all_bytes_in()
 
200
                    if alt_compressed is None:
 
201
                        bytes_out, compressor, _ = self._recompress_all_bytes_in()
 
202
                    else:
 
203
                        bytes_out, compressor = alt_compressed
165
204
                    self.compressor = compressor
166
205
                    self.bytes_list = bytes_out
167
206
                    self.unused_bytes = bytes
168
 
                    self.num_zsync = 0
169
207
                    return True
170
208
                else:
171
209
                    # This fits when we pack it tighter, so use the new packing
 
210
                    if alt_compressed is not None:
 
211
                        # We know it will fit, so put it into another
 
212
                        # compressor without Z_SYNC_FLUSH
 
213
                        bytes_out, compressor = alt_compressed
 
214
                        compressor.compress(bytes)
 
215
                        self.num_zsync = 0
 
216
                    else:
 
217
                        # There is one Z_SYNC_FLUSH call in
 
218
                        # _recompress_all_bytes_in
 
219
                        self.num_zsync = 1
172
220
                    self.compressor = compressor
173
221
                    self.bytes_in.append(bytes)
174
222
                    self.bytes_list = bytes_out
175
 
                    # There is one Z_SYNC_FLUSH call in
176
 
                    # _recompress_all_bytes_in
177
 
                    self.num_zsync = 1
178
223
            else:
179
224
                # It fit, so mark it added
180
225
                self.bytes_in.append(bytes)