bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
2052.3.2
by John Arbash Meinel
 Change Copyright .. by Canonical to Copyright ... Canonical  | 
1  | 
# Copyright (C) 2005, 2006 Canonical Ltd
 | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
2  | 
# Written by Robert Collins <robert.collins@canonical.com>
 | 
3  | 
#
 | 
|
4  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
5  | 
# it under the terms of the GNU General Public License as published by
 | 
|
6  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
7  | 
# (at your option) any later version.
 | 
|
8  | 
#
 | 
|
9  | 
# This program is distributed in the hope that it will be useful,
 | 
|
10  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
11  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
12  | 
# GNU General Public License for more details.
 | 
|
13  | 
#
 | 
|
14  | 
# You should have received a copy of the GNU General Public License
 | 
|
15  | 
# along with this program; if not, write to the Free Software
 | 
|
| 
4183.7.1
by Sabin Iacob
 update FSF mailing address  | 
16  | 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
17  | 
|
18  | 
"""Bzrlib specific gzip tunings. We plan to feed these to the upstream gzip."""
 | 
|
19  | 
||
| 
1908.4.12
by John Arbash Meinel
 Minor change to tuned_gzip.  | 
20  | 
from cStringIO import StringIO  | 
| 
1908.4.5
by John Arbash Meinel
 Some small tweaks to knit and tuned_gzip to shave off another couple seconds  | 
21  | 
|
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
22  | 
# make GzipFile faster:
 | 
23  | 
import gzip  | 
|
| 
3734.2.1
by Vincent Ladeuil
 Fix U32, LOWU32 disapearance in python-2.6.  | 
24  | 
from gzip import FEXTRA, FCOMMENT, FNAME, FHCRC  | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
25  | 
import sys  | 
26  | 
import struct  | 
|
27  | 
import zlib  | 
|
28  | 
||
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
29  | 
# we want a \n preserved, break on \n only splitlines.
 | 
30  | 
import bzrlib  | 
|
31  | 
||
| 
2817.3.1
by Robert Collins
 * New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string  | 
32  | 
__all__ = ["GzipFile", "bytes_to_gzip"]  | 
33  | 
||
34  | 
||
| 
3734.2.1
by Vincent Ladeuil
 Fix U32, LOWU32 disapearance in python-2.6.  | 
35  | 
def U32(i):  | 
36  | 
"""Return i as an unsigned integer, assuming it fits in 32 bits.  | 
|
37  | 
||
38  | 
    If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
 | 
|
39  | 
    """
 | 
|
40  | 
if i < 0:  | 
|
41  | 
i += 1L << 32  | 
|
42  | 
return i  | 
|
43  | 
||
44  | 
||
45  | 
def LOWU32(i):  | 
|
46  | 
"""Return the low-order 32 bits of an int, as a non-negative int."""  | 
|
47  | 
return i & 0xFFFFFFFFL  | 
|
48  | 
||
49  | 
||
| 
2817.3.1
by Robert Collins
 * New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string  | 
50  | 
def bytes_to_gzip(bytes, factory=zlib.compressobj,  | 
51  | 
level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED,  | 
|
52  | 
width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL,  | 
|
53  | 
crc32=zlib.crc32):  | 
|
54  | 
"""Create a gzip file containing bytes and return its content."""  | 
|
| 
4398.8.2
by John Arbash Meinel
 Add a chunks_to_gzip function.  | 
55  | 
return chunks_to_gzip([bytes])  | 
56  | 
||
57  | 
||
58  | 
def chunks_to_gzip(chunks, factory=zlib.compressobj,  | 
|
59  | 
level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED,  | 
|
60  | 
width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL,  | 
|
61  | 
crc32=zlib.crc32):  | 
|
62  | 
"""Create a gzip file containing chunks and return its content.  | 
|
63  | 
||
64  | 
    :param chunks: An iterable of strings. Each string can have arbitrary
 | 
|
65  | 
        layout.
 | 
|
66  | 
    """
 | 
|
| 
2817.3.1
by Robert Collins
 * New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string  | 
67  | 
result = [  | 
68  | 
'\037\213' # self.fileobj.write('\037\213') # magic header  | 
|
69  | 
'\010' # self.fileobj.write('\010') # compression method  | 
|
70  | 
                    # fname = self.filename[:-3]
 | 
|
71  | 
                    # flags = 0
 | 
|
72  | 
                    # if fname:
 | 
|
73  | 
                    #     flags = FNAME
 | 
|
74  | 
'\x00' # self.fileobj.write(chr(flags))  | 
|
75  | 
'\0\0\0\0' # write32u(self.fileobj, long(time.time()))  | 
|
76  | 
'\002' # self.fileobj.write('\002')  | 
|
77  | 
'\377' # self.fileobj.write('\377')  | 
|
78  | 
                    # if fname:
 | 
|
79  | 
'' # self.fileobj.write(fname + '\000')  | 
|
80  | 
        ]
 | 
|
81  | 
    # using a compressobj avoids a small header and trailer that the compress()
 | 
|
82  | 
    # utility function adds.
 | 
|
83  | 
compress = factory(level, method, width, mem, 0)  | 
|
| 
4398.8.2
by John Arbash Meinel
 Add a chunks_to_gzip function.  | 
84  | 
crc = 0  | 
85  | 
total_len = 0  | 
|
86  | 
for chunk in chunks:  | 
|
87  | 
crc = crc32(chunk, crc)  | 
|
88  | 
total_len += len(chunk)  | 
|
89  | 
zbytes = compress.compress(chunk)  | 
|
90  | 
if zbytes:  | 
|
91  | 
result.append(zbytes)  | 
|
| 
2817.3.1
by Robert Collins
 * New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string  | 
92  | 
result.append(compress.flush())  | 
93  | 
    # size may exceed 2GB, or even 4GB
 | 
|
| 
4398.8.2
by John Arbash Meinel
 Add a chunks_to_gzip function.  | 
94  | 
result.append(struct.pack("<LL", LOWU32(crc), LOWU32(total_len)))  | 
| 
2817.3.1
by Robert Collins
 * New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string  | 
95  | 
return ''.join(result)  | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
96  | 
|
97  | 
||
98  | 
class GzipFile(gzip.GzipFile):  | 
|
99  | 
"""Knit tuned version of GzipFile.  | 
|
100  | 
||
101  | 
    This is based on the following lsprof stats:
 | 
|
102  | 
    python 2.4 stock GzipFile write:
 | 
|
103  | 
    58971      0   5644.3090   2721.4730   gzip:193(write)
 | 
|
104  | 
    +58971     0   1159.5530   1159.5530   +<built-in method compress>
 | 
|
105  | 
    +176913    0    987.0320    987.0320   +<len>
 | 
|
106  | 
    +58971     0    423.1450    423.1450   +<zlib.crc32>
 | 
|
107  | 
    +58971     0    353.1060    353.1060   +<method 'write' of 'cStringIO.
 | 
|
108  | 
                                            StringO' objects>
 | 
|
109  | 
    tuned GzipFile write:
 | 
|
110  | 
    58971      0   4477.2590   2103.1120   bzrlib.knit:1250(write)
 | 
|
111  | 
    +58971     0   1297.7620   1297.7620   +<built-in method compress>
 | 
|
112  | 
    +58971     0    406.2160    406.2160   +<zlib.crc32>
 | 
|
113  | 
    +58971     0    341.9020    341.9020   +<method 'write' of 'cStringIO.
 | 
|
114  | 
                                            StringO' objects>
 | 
|
115  | 
    +58971     0    328.2670    328.2670   +<len>
 | 
|
116  | 
||
117  | 
||
118  | 
    Yes, its only 1.6 seconds, but they add up.
 | 
|
119  | 
    """
 | 
|
120  | 
||
121  | 
def _add_read_data(self, data):  | 
|
122  | 
        # 4169 calls in 183
 | 
|
123  | 
        # temp var for len(data) and switch to +='s.
 | 
|
124  | 
        # 4169 in 139
 | 
|
125  | 
len_data = len(data)  | 
|
126  | 
self.crc = zlib.crc32(data, self.crc)  | 
|
127  | 
self.extrabuf += data  | 
|
128  | 
self.extrasize += len_data  | 
|
129  | 
self.size += len_data  | 
|
130  | 
||
| 
1908.4.3
by John Arbash Meinel
 Shave another second off of _record_to_data time, by optimizing single write versus multiple writes  | 
131  | 
def _write_gzip_header(self):  | 
132  | 
"""A tuned version of gzip._write_gzip_header  | 
|
133  | 
||
134  | 
        We have some extra constrains that plain Gzip does not.
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
135  | 
        1) We want to write the whole blob at once. rather than multiple
 | 
| 
1908.4.10
by John Arbash Meinel
 Small cleanups  | 
136  | 
           calls to fileobj.write().
 | 
| 
1908.4.3
by John Arbash Meinel
 Shave another second off of _record_to_data time, by optimizing single write versus multiple writes  | 
137  | 
        2) We never have a filename
 | 
138  | 
        3) We don't care about the time
 | 
|
139  | 
        """
 | 
|
140  | 
self.fileobj.write(  | 
|
141  | 
'\037\213' # self.fileobj.write('\037\213') # magic header  | 
|
142  | 
'\010' # self.fileobj.write('\010') # compression method  | 
|
143  | 
                        # fname = self.filename[:-3]
 | 
|
144  | 
                        # flags = 0
 | 
|
145  | 
                        # if fname:
 | 
|
146  | 
                        #     flags = FNAME
 | 
|
147  | 
'\x00' # self.fileobj.write(chr(flags))  | 
|
148  | 
'\0\0\0\0' # write32u(self.fileobj, long(time.time()))  | 
|
149  | 
'\002' # self.fileobj.write('\002')  | 
|
150  | 
'\377' # self.fileobj.write('\377')  | 
|
151  | 
                        # if fname:
 | 
|
152  | 
'' # self.fileobj.write(fname + '\000')  | 
|
153  | 
            )
 | 
|
154  | 
||
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
155  | 
def _read(self, size=1024):  | 
156  | 
        # various optimisations:
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
157  | 
        # reduces lsprof count from 2500 to
 | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
158  | 
        # 8337 calls in 1272, 365 internal
 | 
159  | 
if self.fileobj is None:  | 
|
160  | 
raise EOFError, "Reached EOF"  | 
|
161  | 
||
162  | 
if self._new_member:  | 
|
163  | 
            # If the _new_member flag is set, we have to
 | 
|
164  | 
            # jump to the next member, if there is one.
 | 
|
165  | 
            #
 | 
|
166  | 
            # First, check if we're at the end of the file;
 | 
|
167  | 
            # if so, it's time to stop; no more members to read.
 | 
|
168  | 
next_header_bytes = self.fileobj.read(10)  | 
|
169  | 
if next_header_bytes == '':  | 
|
170  | 
raise EOFError, "Reached EOF"  | 
|
171  | 
||
172  | 
self._init_read()  | 
|
173  | 
self._read_gzip_header(next_header_bytes)  | 
|
174  | 
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)  | 
|
175  | 
self._new_member = False  | 
|
176  | 
||
177  | 
        # Read a chunk of data from the file
 | 
|
178  | 
buf = self.fileobj.read(size)  | 
|
179  | 
||
180  | 
        # If the EOF has been reached, flush the decompression object
 | 
|
181  | 
        # and mark this object as finished.
 | 
|
182  | 
||
183  | 
if buf == "":  | 
|
184  | 
self._add_read_data(self.decompress.flush())  | 
|
| 
3376.2.4
by Martin Pool
 Remove every assert statement from bzrlib!  | 
185  | 
if len(self.decompress.unused_data) < 8:  | 
186  | 
raise AssertionError("what does flush do?")  | 
|
| 
1666.1.11
by Robert Collins
 Really fix short-read support in tuned_gzip. The python zlib module behaved differently than thought.  | 
187  | 
self._gzip_tail = self.decompress.unused_data[0:8]  | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
188  | 
self._read_eof()  | 
189  | 
            # tell the driving read() call we have stuffed all the data
 | 
|
190  | 
            # in self.extrabuf
 | 
|
191  | 
raise EOFError, 'Reached EOF'  | 
|
192  | 
||
193  | 
self._add_read_data(self.decompress.decompress(buf))  | 
|
194  | 
||
195  | 
if self.decompress.unused_data != "":  | 
|
196  | 
            # Ending case: we've come to the end of a member in the file,
 | 
|
197  | 
            # so seek back to the start of the data for the next member which
 | 
|
198  | 
            # is the length of the decompress objects unused data - the first
 | 
|
199  | 
            # 8 bytes for the end crc and size records.
 | 
|
200  | 
            #
 | 
|
201  | 
            # so seek back to the start of the unused data, finish up
 | 
|
202  | 
            # this member, and read a new gzip header.
 | 
|
203  | 
            # (The number of bytes to seek back is the length of the unused
 | 
|
204  | 
            # data, minus 8 because those 8 bytes are part of this member.
 | 
|
205  | 
seek_length = len (self.decompress.unused_data) - 8  | 
|
| 
1666.1.2
by Robert Collins
 Fix race condition between end of stream and end of file with tuned_gzip.  | 
206  | 
if seek_length > 0:  | 
207  | 
                # we read too much data
 | 
|
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
208  | 
self.fileobj.seek(-seek_length, 1)  | 
| 
1666.1.11
by Robert Collins
 Really fix short-read support in tuned_gzip. The python zlib module behaved differently than thought.  | 
209  | 
self._gzip_tail = self.decompress.unused_data[0:8]  | 
| 
1666.1.2
by Robert Collins
 Fix race condition between end of stream and end of file with tuned_gzip.  | 
210  | 
elif seek_length < 0:  | 
211  | 
                # we haven't read enough to check the checksum.
 | 
|
| 
3376.2.4
by Martin Pool
 Remove every assert statement from bzrlib!  | 
212  | 
if not (-8 < seek_length):  | 
213  | 
raise AssertionError("too great a seek")  | 
|
| 
1666.1.2
by Robert Collins
 Fix race condition between end of stream and end of file with tuned_gzip.  | 
214  | 
buf = self.fileobj.read(-seek_length)  | 
| 
1666.1.11
by Robert Collins
 Really fix short-read support in tuned_gzip. The python zlib module behaved differently than thought.  | 
215  | 
self._gzip_tail = self.decompress.unused_data + buf  | 
216  | 
else:  | 
|
217  | 
self._gzip_tail = self.decompress.unused_data  | 
|
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
218  | 
|
219  | 
            # Check the CRC and file size, and set the flag so we read
 | 
|
220  | 
            # a new member on the next call
 | 
|
221  | 
self._read_eof()  | 
|
222  | 
self._new_member = True  | 
|
223  | 
||
224  | 
def _read_eof(self):  | 
|
225  | 
"""tuned to reduce function calls and eliminate file seeking:  | 
|
226  | 
        pass 1:
 | 
|
227  | 
        reduces lsprof count from 800 to 288
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
228  | 
        4168 in 296
 | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
229  | 
        avoid U32 call by using struct format L
 | 
230  | 
        4168 in 200
 | 
|
231  | 
        """
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
232  | 
        # We've read to the end of the file, so we should have 8 bytes of
 | 
| 
1759.2.2
by Jelmer Vernooij
 Revert some of my spelling fixes and fix some typos after review by Aaron.  | 
233  | 
        # unused data in the decompressor. If we don't, there is a corrupt file.
 | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
234  | 
        # We use these 8 bytes to calculate the CRC and the recorded file size.
 | 
235  | 
        # We then check the that the computed CRC and size of the
 | 
|
236  | 
        # uncompressed data matches the stored values.  Note that the size
 | 
|
237  | 
        # stored is the true file size mod 2**32.
 | 
|
| 
3376.2.4
by Martin Pool
 Remove every assert statement from bzrlib!  | 
238  | 
if not (len(self._gzip_tail) == 8):  | 
239  | 
raise AssertionError("gzip trailer is incorrect length.")  | 
|
| 
1666.1.11
by Robert Collins
 Really fix short-read support in tuned_gzip. The python zlib module behaved differently than thought.  | 
240  | 
crc32, isize = struct.unpack("<LL", self._gzip_tail)  | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
241  | 
        # note that isize is unsigned - it can exceed 2GB
 | 
242  | 
if crc32 != U32(self.crc):  | 
|
| 
1666.1.2
by Robert Collins
 Fix race condition between end of stream and end of file with tuned_gzip.  | 
243  | 
raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))  | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
244  | 
elif isize != LOWU32(self.size):  | 
245  | 
raise IOError, "Incorrect length of data produced"  | 
|
246  | 
||
247  | 
def _read_gzip_header(self, bytes=None):  | 
|
248  | 
"""Supply bytes if the minimum header size is already read.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
249  | 
|
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
250  | 
        :param bytes: 10 bytes of header data.
 | 
251  | 
        """
 | 
|
252  | 
"""starting cost: 300 in 3998  | 
|
253  | 
        15998 reads from 3998 calls
 | 
|
254  | 
        final cost 168
 | 
|
255  | 
        """
 | 
|
256  | 
if bytes is None:  | 
|
257  | 
bytes = self.fileobj.read(10)  | 
|
258  | 
magic = bytes[0:2]  | 
|
259  | 
if magic != '\037\213':  | 
|
260  | 
raise IOError, 'Not a gzipped file'  | 
|
261  | 
method = ord(bytes[2:3])  | 
|
262  | 
if method != 8:  | 
|
263  | 
raise IOError, 'Unknown compression method'  | 
|
264  | 
flag = ord(bytes[3:4])  | 
|
265  | 
        # modtime = self.fileobj.read(4) (bytes [4:8])
 | 
|
266  | 
        # extraflag = self.fileobj.read(1) (bytes[8:9])
 | 
|
267  | 
        # os = self.fileobj.read(1) (bytes[9:10])
 | 
|
268  | 
        # self.fileobj.read(6)
 | 
|
269  | 
||
270  | 
if flag & FEXTRA:  | 
|
271  | 
            # Read & discard the extra field, if present
 | 
|
272  | 
xlen = ord(self.fileobj.read(1))  | 
|
273  | 
xlen = xlen + 256*ord(self.fileobj.read(1))  | 
|
274  | 
self.fileobj.read(xlen)  | 
|
275  | 
if flag & FNAME:  | 
|
276  | 
            # Read and discard a null-terminated string containing the filename
 | 
|
277  | 
while True:  | 
|
278  | 
s = self.fileobj.read(1)  | 
|
279  | 
if not s or s=='\000':  | 
|
280  | 
                    break
 | 
|
281  | 
if flag & FCOMMENT:  | 
|
282  | 
            # Read and discard a null-terminated string containing a comment
 | 
|
283  | 
while True:  | 
|
284  | 
s = self.fileobj.read(1)  | 
|
285  | 
if not s or s=='\000':  | 
|
286  | 
                    break
 | 
|
287  | 
if flag & FHCRC:  | 
|
288  | 
self.fileobj.read(2) # Read & discard the 16-bit header CRC  | 
|
289  | 
||
290  | 
def readline(self, size=-1):  | 
|
291  | 
"""Tuned to remove buffer length calls in _unread and...  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
292  | 
|
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
293  | 
        also removes multiple len(c) calls, inlines _unread,
 | 
294  | 
        total savings - lsprof 5800 to 5300
 | 
|
295  | 
        phase 2:
 | 
|
296  | 
        4168 calls in 2233
 | 
|
297  | 
        8176 calls to read() in 1684
 | 
|
298  | 
        changing the min chunk size to 200 halved all the cache misses
 | 
|
299  | 
        leading to a drop to:
 | 
|
300  | 
        4168 calls in 1977
 | 
|
301  | 
        4168 call to read() in 1646
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
302  | 
        - i.e. just reduced the function call overhead. May be worth
 | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
303  | 
          keeping.
 | 
304  | 
        """
 | 
|
305  | 
if size < 0: size = sys.maxint  | 
|
306  | 
bufs = []  | 
|
307  | 
readsize = min(200, size) # Read from the file in small chunks  | 
|
308  | 
while True:  | 
|
309  | 
if size == 0:  | 
|
310  | 
return "".join(bufs) # Return resulting line  | 
|
311  | 
||
312  | 
            # c is the chunk
 | 
|
313  | 
c = self.read(readsize)  | 
|
314  | 
            # number of bytes read
 | 
|
315  | 
len_c = len(c)  | 
|
316  | 
i = c.find('\n')  | 
|
317  | 
if size is not None:  | 
|
318  | 
                # We set i=size to break out of the loop under two
 | 
|
319  | 
                # conditions: 1) there's no newline, and the chunk is
 | 
|
320  | 
                # larger than size, or 2) there is a newline, but the
 | 
|
321  | 
                # resulting line would be longer than 'size'.
 | 
|
322  | 
if i==-1 and len_c > size: i=size-1  | 
|
323  | 
elif size <= i: i = size -1  | 
|
324  | 
||
325  | 
if i >= 0 or c == '':  | 
|
326  | 
                # if i>= 0 we have a newline or have triggered the above
 | 
|
327  | 
                # if size is not None condition.
 | 
|
328  | 
                # if c == '' its EOF.
 | 
|
329  | 
bufs.append(c[:i+1]) # Add portion of last chunk  | 
|
330  | 
                # -- inlined self._unread --
 | 
|
331  | 
                ## self._unread(c[i+1:], len_c - i)   # Push back rest of chunk
 | 
|
332  | 
self.extrabuf = c[i+1:] + self.extrabuf  | 
|
333  | 
self.extrasize = len_c - i + self.extrasize  | 
|
334  | 
self.offset -= len_c - i  | 
|
335  | 
                # -- end inlined self._unread --
 | 
|
336  | 
return ''.join(bufs) # Return resulting line  | 
|
337  | 
||
338  | 
            # Append chunk to list, decrease 'size',
 | 
|
339  | 
bufs.append(c)  | 
|
340  | 
size = size - len_c  | 
|
341  | 
readsize = min(size, readsize * 2)  | 
|
342  | 
||
343  | 
def readlines(self, sizehint=0):  | 
|
344  | 
        # optimise to avoid all the buffer manipulation
 | 
|
345  | 
        # lsprof changed from:
 | 
|
346  | 
        # 4168 calls in 5472 with 32000 calls to readline()
 | 
|
347  | 
        # to :
 | 
|
348  | 
        # 4168 calls in 417.
 | 
|
349  | 
        # Negative numbers result in reading all the lines
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
350  | 
|
| 
1908.4.15
by John Arbash Meinel
 comment on tuned_gzip.readlines() functionality.  | 
351  | 
        # python's gzip routine uses sizehint. This is a more efficient way
 | 
352  | 
        # than python uses to honor it. But it is even more efficient to
 | 
|
353  | 
        # just read the entire thing and use cStringIO to split into lines.
 | 
|
354  | 
        # if sizehint <= 0:
 | 
|
355  | 
        #     sizehint = -1
 | 
|
356  | 
        # content = self.read(sizehint)
 | 
|
357  | 
        # return bzrlib.osutils.split_lines(content)
 | 
|
| 
1908.4.12
by John Arbash Meinel
 Minor change to tuned_gzip.  | 
358  | 
content = StringIO(self.read(-1))  | 
| 
1908.4.5
by John Arbash Meinel
 Some small tweaks to knit and tuned_gzip to shave off another couple seconds  | 
359  | 
return content.readlines()  | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
360  | 
|
361  | 
def _unread(self, buf, len_buf=None):  | 
|
362  | 
"""tuned to remove unneeded len calls.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
363  | 
|
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
364  | 
        because this is such an inner routine in readline, and readline is
 | 
365  | 
        in many inner loops, this has been inlined into readline().
 | 
|
366  | 
||
367  | 
        The len_buf parameter combined with the reduction in len calls dropped
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
368  | 
        the lsprof ms count for this routine on my test data from 800 to 200 -
 | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
369  | 
        a 75% saving.
 | 
370  | 
        """
 | 
|
371  | 
if len_buf is None:  | 
|
372  | 
len_buf = len(buf)  | 
|
373  | 
self.extrabuf = buf + self.extrabuf  | 
|
374  | 
self.extrasize = len_buf + self.extrasize  | 
|
375  | 
self.offset -= len_buf  | 
|
376  | 
||
377  | 
def write(self, data):  | 
|
378  | 
if self.mode != gzip.WRITE:  | 
|
379  | 
import errno  | 
|
380  | 
raise IOError(errno.EBADF, "write() on read-only GzipFile object")  | 
|
381  | 
||
382  | 
if self.fileobj is None:  | 
|
383  | 
raise ValueError, "write() on closed GzipFile object"  | 
|
384  | 
data_len = len(data)  | 
|
385  | 
if data_len > 0:  | 
|
386  | 
self.size = self.size + data_len  | 
|
387  | 
self.crc = zlib.crc32(data, self.crc)  | 
|
388  | 
self.fileobj.write( self.compress.compress(data) )  | 
|
389  | 
self.offset += data_len  | 
|
390  | 
||
391  | 
def writelines(self, lines):  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
392  | 
        # profiling indicated a significant overhead
 | 
| 
1641.1.1
by Robert Collins
 * Various microoptimisations to knit and gzip - reducing function call  | 
393  | 
        # calling write for each line.
 | 
394  | 
        # this batch call is a lot faster :).
 | 
|
395  | 
        # (4 seconds to 1 seconds for the sample upgrades I was testing).
 | 
|
396  | 
self.write(''.join(lines))  | 
|
397  | 
||
398  |