/brz/remove-bazaar : revision 0.200.1036

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/tuned_gzip.py

Committer: Jelmer Vernooij
Date: 2010-09-11 17:48:45 UTC
mto: (0.312.1 master) (6883.23.1 bundle-git)
mto: This revision was merged to the branch mainline in revision 6960.
Revision ID: jelmer@samba.org-20100911174845-ro06bb6gg6ws8jit

More work on roundtrip push support.

files added:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

files removed:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/response.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/index.txt

doc/plugins.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

generate_docs.py

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/ostools.py

tools/win32/start_bzr.bat

Show diffs side-by-side

added added

removed removed

bzrlib/tuned_gzip.py

# Written by Robert Collins <robert.collins@canonical.com>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Bzrlib specific gzip tunings. We plan to feed these to the upstream gzip."""

from cStringIO import StringIO

# make GzipFile faster:

import gzip

from gzip import U32, LOWU32, FEXTRA, FCOMMENT, FNAME, FHCRC

import sys

import struct

import zlib

# we want a \n preserved, break on \n only splitlines.

import bzrlib

__all__ = ["GzipFile"]

class GzipFile(gzip.GzipFile):

"""Knit tuned version of GzipFile.

This is based on the following lsprof stats:

python 2.4 stock GzipFile write:

58971 0 5644.3090 2721.4730 gzip:193(write)

+58971 0 1159.5530 1159.5530 +<built-in method compress>

+176913 0 987.0320 987.0320 +<len>

+58971 0 423.1450 423.1450 +<zlib.crc32>

+58971 0 353.1060 353.1060 +<method 'write' of 'cStringIO.

StringO' objects>

tuned GzipFile write:

58971 0 4477.2590 2103.1120 bzrlib.knit:1250(write)

+58971 0 1297.7620 1297.7620 +<built-in method compress>

+58971 0 406.2160 406.2160 +<zlib.crc32>

+58971 0 341.9020 341.9020 +<method 'write' of 'cStringIO.

StringO' objects>

+58971 0 328.2670 328.2670 +<len>

Yes, its only 1.6 seconds, but they add up.

"""

def _add_read_data(self, data):

# 4169 calls in 183

# temp var for len(data) and switch to +='s.

# 4169 in 139

len_data = len(data)

self.crc = zlib.crc32(data, self.crc)

self.extrabuf += data

self.extrasize += len_data

self.size += len_data

def _write_gzip_header(self):

"""A tuned version of gzip._write_gzip_header

We have some extra constrains that plain Gzip does not.

1) We want to write the whole blob at once. rather than multiple

calls to fileobj.write().

2) We never have a filename

3) We don't care about the time

"""

self.fileobj.write(

'\037\213' # self.fileobj.write('\037\213') # magic header

'\010' # self.fileobj.write('\010') # compression method

# fname = self.filename[:-3]

# flags = 0

# if fname:

# flags = FNAME

'\x00' # self.fileobj.write(chr(flags))

'\0\0\0\0' # write32u(self.fileobj, long(time.time()))

'\002' # self.fileobj.write('\002')

'\377' # self.fileobj.write('\377')

# if fname:

'' # self.fileobj.write(fname + '\000')

)

def _read(self, size=1024):

# various optimisations:

# reduces lsprof count from 2500 to

# 8337 calls in 1272, 365 internal

if self.fileobj is None:

raise EOFError, "Reached EOF"

if self._new_member:

100

# If the _new_member flag is set, we have to

101

# jump to the next member, if there is one.

102

103

# First, check if we're at the end of the file;

104

# if so, it's time to stop; no more members to read.

105

next_header_bytes = self.fileobj.read(10)

106

if next_header_bytes == '':

107

raise EOFError, "Reached EOF"

108

109

self._init_read()

110

self._read_gzip_header(next_header_bytes)

111

self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)

112

self._new_member = False

113

114

# Read a chunk of data from the file

115

buf = self.fileobj.read(size)

116

117

# If the EOF has been reached, flush the decompression object

118

# and mark this object as finished.

119

120

if buf == "":

121

self._add_read_data(self.decompress.flush())

122

assert len(self.decompress.unused_data) >= 8, "what does flush do?"

123

self._gzip_tail = self.decompress.unused_data[0:8]

124

self._read_eof()

125

# tell the driving read() call we have stuffed all the data

126

# in self.extrabuf

127

raise EOFError, 'Reached EOF'

128

129

self._add_read_data(self.decompress.decompress(buf))

130

131

if self.decompress.unused_data != "":

132

# Ending case: we've come to the end of a member in the file,

133

# so seek back to the start of the data for the next member which

134

# is the length of the decompress objects unused data - the first

135

# 8 bytes for the end crc and size records.

136

137

# so seek back to the start of the unused data, finish up

138

# this member, and read a new gzip header.

139

# (The number of bytes to seek back is the length of the unused

140

# data, minus 8 because those 8 bytes are part of this member.

141

seek_length = len (self.decompress.unused_data) - 8

142

if seek_length > 0:

143

# we read too much data

144

self.fileobj.seek(-seek_length, 1)

145

self._gzip_tail = self.decompress.unused_data[0:8]

146

elif seek_length < 0:

147

# we haven't read enough to check the checksum.

148

assert -8 < seek_length, "too great a seek."

149

buf = self.fileobj.read(-seek_length)

150

self._gzip_tail = self.decompress.unused_data + buf

151

else:

152

self._gzip_tail = self.decompress.unused_data

153

154

# Check the CRC and file size, and set the flag so we read

155

# a new member on the next call

156

self._read_eof()

157

self._new_member = True

158

159

def _read_eof(self):

160

"""tuned to reduce function calls and eliminate file seeking:

161

pass 1:

162

reduces lsprof count from 800 to 288

163

4168 in 296

164

avoid U32 call by using struct format L

165

4168 in 200

166

"""

167

# We've read to the end of the file, so we should have 8 bytes of

168

# unused data in the decompressor. If we don't, there is a corrupt file.

169

# We use these 8 bytes to calculate the CRC and the recorded file size.

170

# We then check the that the computed CRC and size of the

171

# uncompressed data matches the stored values. Note that the size

172

# stored is the true file size mod 2**32.

173

assert len(self._gzip_tail) == 8, "gzip trailer is incorrect length."

174

crc32, isize = struct.unpack("<LL", self._gzip_tail)

175

# note that isize is unsigned - it can exceed 2GB

176

if crc32 != U32(self.crc):

177

raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))

178

elif isize != LOWU32(self.size):

179

raise IOError, "Incorrect length of data produced"

180

181

def _read_gzip_header(self, bytes=None):

182

"""Supply bytes if the minimum header size is already read.

183

184

:param bytes: 10 bytes of header data.

185

"""

186

"""starting cost: 300 in 3998

187

15998 reads from 3998 calls

188

final cost 168

189

"""

190

if bytes is None:

191

bytes = self.fileobj.read(10)

192

magic = bytes[0:2]

193

if magic != '\037\213':

194

raise IOError, 'Not a gzipped file'

195

method = ord(bytes[2:3])

196

if method != 8:

197

raise IOError, 'Unknown compression method'

198

flag = ord(bytes[3:4])

199

# modtime = self.fileobj.read(4) (bytes [4:8])

200

# extraflag = self.fileobj.read(1) (bytes[8:9])

201

# os = self.fileobj.read(1) (bytes[9:10])

202

# self.fileobj.read(6)

203

204

if flag & FEXTRA:

205

# Read & discard the extra field, if present

206

xlen = ord(self.fileobj.read(1))

207

xlen = xlen + 256*ord(self.fileobj.read(1))

208

self.fileobj.read(xlen)

209

if flag & FNAME:

210

# Read and discard a null-terminated string containing the filename

211

while True:

212

s = self.fileobj.read(1)

213

if not s or s=='\000':

214

break

215

if flag & FCOMMENT:

216

# Read and discard a null-terminated string containing a comment

217

while True:

218

s = self.fileobj.read(1)

219

if not s or s=='\000':

220

break

221

if flag & FHCRC:

222

self.fileobj.read(2) # Read & discard the 16-bit header CRC

223

224

def readline(self, size=-1):

225

"""Tuned to remove buffer length calls in _unread and...

226

227

also removes multiple len(c) calls, inlines _unread,

228

total savings - lsprof 5800 to 5300

229

phase 2:

230

4168 calls in 2233

231

8176 calls to read() in 1684

232

changing the min chunk size to 200 halved all the cache misses

233

leading to a drop to:

234

4168 calls in 1977

235

4168 call to read() in 1646

236

- i.e. just reduced the function call overhead. May be worth

237

keeping.

238

"""

239

if size < 0: size = sys.maxint

240

bufs = []

241

readsize = min(200, size) # Read from the file in small chunks

242

while True:

243

if size == 0:

244

return "".join(bufs) # Return resulting line

245

246

# c is the chunk

247

c = self.read(readsize)

248

# number of bytes read

249

len_c = len(c)

250

i = c.find('\n')

251

if size is not None:

252

# We set i=size to break out of the loop under two

253

# conditions: 1) there's no newline, and the chunk is

254

# larger than size, or 2) there is a newline, but the

255

# resulting line would be longer than 'size'.

256

if i==-1 and len_c > size: i=size-1

257

elif size <= i: i = size -1

258

259

if i >= 0 or c == '':

260

# if i>= 0 we have a newline or have triggered the above

261

# if size is not None condition.

262

# if c == '' its EOF.

263

bufs.append(c[:i+1]) # Add portion of last chunk

264

# -- inlined self._unread --

265

## self._unread(c[i+1:], len_c - i) # Push back rest of chunk

266

self.extrabuf = c[i+1:] + self.extrabuf

267

self.extrasize = len_c - i + self.extrasize

268

self.offset -= len_c - i

269

# -- end inlined self._unread --

270

return ''.join(bufs) # Return resulting line

271

272

# Append chunk to list, decrease 'size',

273

bufs.append(c)

274

size = size - len_c

275

readsize = min(size, readsize * 2)

276

277

def readlines(self, sizehint=0):

278

# optimise to avoid all the buffer manipulation

279

# lsprof changed from:

280

# 4168 calls in 5472 with 32000 calls to readline()

281

# to :

282

# 4168 calls in 417.

283

# Negative numbers result in reading all the lines

284

285

# python's gzip routine uses sizehint. This is a more efficient way

286

# than python uses to honor it. But it is even more efficient to

287

# just read the entire thing and use cStringIO to split into lines.

288

# if sizehint <= 0:

289

# sizehint = -1

290

# content = self.read(sizehint)

291

# return bzrlib.osutils.split_lines(content)

292

content = StringIO(self.read(-1))

293

return content.readlines()

294

295

def _unread(self, buf, len_buf=None):

296

"""tuned to remove unneeded len calls.

297

298

because this is such an inner routine in readline, and readline is

299

in many inner loops, this has been inlined into readline().

300

301

The len_buf parameter combined with the reduction in len calls dropped

302

the lsprof ms count for this routine on my test data from 800 to 200 -

303

a 75% saving.

304

"""

305

if len_buf is None:

306

len_buf = len(buf)

307

self.extrabuf = buf + self.extrabuf

308

self.extrasize = len_buf + self.extrasize

309

self.offset -= len_buf

310

311

def write(self, data):

312

if self.mode != gzip.WRITE:

313

import errno

314

raise IOError(errno.EBADF, "write() on read-only GzipFile object")

315

316

if self.fileobj is None:

317

raise ValueError, "write() on closed GzipFile object"

318

data_len = len(data)

319

if data_len > 0:

320

self.size = self.size + data_len

321

self.crc = zlib.crc32(data, self.crc)

322

self.fileobj.write( self.compress.compress(data) )

323

self.offset += data_len

324

325

def writelines(self, lines):

326

# profiling indicated a significant overhead

327

# calling write for each line.

328

# this batch call is a lot faster :).

329

# (4 seconds to 1 seconds for the sample upgrades I was testing).

330

self.write(''.join(lines))

331

332

Older »