/brz/remove-bazaar : revision 1429

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/store/compressed_text.py

Committer: Robert Collins
Date: 2005-10-09 23:42:12 UTC
Revision ID: robertc@robertcollins.net-20051009234212-7973344d900afb0b

merge in niemeyers prefixed-store patch

files added:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzr-man.py

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/clone.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/conflicts.py

bzrlib/progress.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/selftest

bzrlib/selftest/HTTPTestUtil.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_bad_files.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_conflicts.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_revision_info.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_upgrade.py

bzrlib/selftest/test_weave.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testidentitymap.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/testsampler.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/testtransactions.py

bzrlib/selftest/testtransport.py

bzrlib/selftest/testworkingtree.py

bzrlib/selftest/treeshape.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/compressed_text.py

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/tree.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

setup.py

testbzr

tools

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/store/compressed_text.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""

An implementation the primary storage type CompressedTextStore.

This store keeps compressed versions of the full text. It does not

do any sort of delta compression.

"""

import os, tempfile, gzip

import bzrlib.store

from bzrlib.store import hash_prefix

from bzrlib.trace import mutter

from bzrlib.errors import BzrError, FileExists

from StringIO import StringIO

from stat import ST_SIZE, ST_MODE, S_ISDIR

class CompressedTextStore(bzrlib.store.TransportStore):

"""Store that holds files indexed by unique names.

Files can be added, but not modified once they are in. Typically

the hash is used as the name, or something else known to be unique,

such as a UUID.

Files are stored gzip compressed, with no delta compression.

>>> st = ScratchCompressedTextStore()

>>> st.add(StringIO('hello'), 'aa')

>>> 'aa' in st

True

>>> 'foo' in st

False

You are not allowed to add an id that is already present.

Entries can be retrieved as files, which may then be read.

>>> st.add(StringIO('goodbye'), '123123')

>>> st['123123'].read()

'goodbye'

"""

def __init__(self, transport, prefixed=False):

super(CompressedTextStore, self).__init__(transport)

self._prefixed = prefixed

def _check_fileid(self, fileid):

if '\\' in fileid or '/' in fileid:

raise ValueError("invalid store id %r" % fileid)

def _relpath(self, fileid):

self._check_fileid(fileid)

if self._prefixed:

return hash_prefix(fileid) + fileid + ".gz"

else:

return fileid + ".gz"

def add(self, f, fileid):

"""Add contents of a file into the store.

f -- An open file, or file-like object."""

# TODO: implement an add_multi which can do some of it's

# own piplelining, and possible take advantage of

# transport.put_multi(). The problem is that

# entries potentially need to be compressed as they

# are received, which implies translation, which

# means it isn't as straightforward as we would like.

from cStringIO import StringIO

from bzrlib.osutils import pumpfile

mutter("add store entry %r" % (fileid))

if isinstance(f, basestring):

f = StringIO(f)

fn = self._relpath(fileid)

if self._transport.has(fn):

raise BzrError("store %r already contains id %r" % (self._transport.base, fileid))

if self._prefixed:

try:

self._transport.mkdir(hash_prefix(fileid))

except FileExists:

100

pass

101

102

sio = StringIO()

103

gf = gzip.GzipFile(mode='wb', fileobj=sio)

104

# if pumpfile handles files that don't fit in ram,

105

# so will this function

106

if isinstance(f, basestring):

107

gf.write(f)

108

else:

109

pumpfile(f, gf)

110

gf.close()

111

sio.seek(0)

112

self._transport.put(fn, sio)

113

114

def _do_copy(self, other, to_copy, pb, permit_failure=False):

115

if isinstance(other, CompressedTextStore):

116

return self._copy_multi_text(other, to_copy, pb,

117

permit_failure=permit_failure)

118

return super(CompressedTextStore, self)._do_copy(other, to_copy,

119

pb, permit_failure=permit_failure)

120

121

def _copy_multi_text(self, other, to_copy, pb,

122

permit_failure=False):

123

# Because of _transport, we can no longer assume

124

# that they are on the same filesystem, we can, however

125

# assume that we only need to copy the exact bytes,

126

# we don't need to process the files.

127

128

failed = set()

129

if permit_failure:

130

new_to_copy = set()

131

for fileid, has in zip(to_copy, other.has(to_copy)):

132

if has:

133

new_to_copy.add(fileid)

134

else:

135

failed.add(fileid)

136

to_copy = new_to_copy

137

#mutter('_copy_multi_text copying %s, failed %s' % (to_copy, failed))

138

139

paths = [self._relpath(fileid) for fileid in to_copy]

140

count = other._transport.copy_to(paths, self._transport, pb=pb)

141

assert count == len(to_copy)

142

return count, failed

143

144

def __contains__(self, fileid):

145

""""""

146

fn = self._relpath(fileid)

147

return self._transport.has(fn)

148

149

def has(self, fileids, pb=None):

150

"""Return True/False for each entry in fileids.

151

152

:param fileids: A List or generator yielding file ids.

153

:return: A generator or list returning True/False for each entry.

154

"""

155

relpaths = (self._relpath(fid) for fid in fileids)

156

return self._transport.has_multi(relpaths, pb=pb)

157

158

def get(self, fileids, permit_failure=False, pb=None):

159

"""Return a set of files, one for each requested entry.

160

161

TODO: Write some tests to make sure that permit_failure is

162

handled correctly.

163

164

TODO: What should the exception be for a missing file?

165

KeyError, or NoSuchFile?

166

"""

167

168

# This next code gets a bit hairy because it can allow

169

# to not request a file which doesn't seem to exist.

170

# Also, the same fileid may be requested twice, so we

171

# can't just build up a map.

172

rel_paths = [self._relpath(fid) for fid in fileids]

173

is_requested = []

174

175

#mutter('CompressedTextStore.get(permit_failure=%s)' % permit_failure)

176

if permit_failure:

177

existing_paths = []

178

for path, has in zip(rel_paths,

179

self._transport.has_multi(rel_paths)):

180

if has:

181

existing_paths.append(path)

182

is_requested.append(True)

183

else:

184

is_requested.append(False)

185

#mutter('Retrieving %s out of %s' % (existing_paths, rel_paths))

186

else:

187

#mutter('Retrieving all %s' % (rel_paths, ))

188

existing_paths = rel_paths

189

is_requested = [True for x in rel_paths]

190

191

count = 0

192

for f in self._transport.get_multi(existing_paths, pb=pb):

193

assert count < len(is_requested)

194

while not is_requested[count]:

195

yield None

196

count += 1

197

if hasattr(f, 'tell'):

198

yield gzip.GzipFile(mode='rb', fileobj=f)

199

else:

200

from cStringIO import StringIO

201

sio = StringIO(f.read())

202

yield gzip.GzipFile(mode='rb', fileobj=sio)

203

count += 1

204

205

while count < len(is_requested):

206

yield None

207

count += 1

208

209

def _iter_relpaths(self):

210

transport = self._transport

211

queue = list(transport.list_dir('.'))

212

while queue:

213

relpath = queue.pop(0)

214

st = transport.stat(relpath)

215

if S_ISDIR(st[ST_MODE]):

216

for i, basename in enumerate(transport.list_dir(relpath)):

217

queue.insert(i, relpath+'/'+basename)

218

else:

219

yield relpath, st

220

221

def __iter__(self):

222

for relpath, st in self._iter_relpaths():

223

if relpath.endswith(".gz"):

224

yield os.path.basename(relpath)[:-3]

225

else:

226

yield os.path.basename(relpath)

227

228

def __len__(self):

229

return len(list(self._iter_relpath()))

230

231

def __getitem__(self, fileid):

232

"""Returns a file reading from a particular entry."""

233

fn = self._relpath(fileid)

234

# This will throw if the file doesn't exist.

235

try:

236

f = self._transport.get(fn)

237

except:

238

raise KeyError('This store (%s) does not contain %s' % (self, fileid))

239

240

# gzip.GzipFile.read() requires a tell() function

241

# but some transports return objects that cannot seek

242

# so buffer them in a StringIO instead

243

if hasattr(f, 'tell'):

244

return gzip.GzipFile(mode='rb', fileobj=f)

245

else:

246

from cStringIO import StringIO

247

sio = StringIO(f.read())

248

return gzip.GzipFile(mode='rb', fileobj=sio)

249

250

251

def total_size(self):

252

"""Return (count, bytes)

253

254

This is the (compressed) size stored on disk, not the size of

255

the content."""

256

total = 0

257

count = 0

258

for relpath, st in self._iter_relpaths():

259

count += 1

260

total += st[ST_SIZE]

261

262

return count, total

263

264

265

class ScratchCompressedTextStore(CompressedTextStore):

266

"""Self-destructing test subclass of CompressedTextStore.

267

268

The Store only exists for the lifetime of the Python object.

269

Obviously you should not put anything precious in it.

270

"""

271

def __init__(self):

272

from transport import transport

273

t = transport(tempfile.mkdtemp())

274

super(ScratchCompressedTextStore, self).__init__(t)

275

276

def __del__(self):

277

self._transport.delete_multi(self._transport.list_dir('.'))

278

os.rmdir(self._transport.base)

279

mutter("%r destroyed" % self)

280

Older »