/brz/remove-bazaar : revision 1056

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/revfile.py

Committer: Martin Pool
Date: 2005-08-12 15:45:20 UTC
Revision ID: mbp@sourcefrog.net-20050812154520-169e3364f569a554

doc

files added:
.bzrignore

.rsyncexclude

HACKING

NEWS

README

TODO

build-api

bzr-man.py

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/missing.py

bzrlib/newinventory.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/selftest

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/status.py

bzrlib/store.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/upgrade.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

effbot

effbot/__init__.py

effbot/org

effbot/org/__init__.py

effbot/org/gzip_consumer.py

effbot/org/http_client.py

effbot/org/http_manager.py

elementtree

elementtree/ElementTree.py

elementtree/__init__.py

notes

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

setup.py

testbzr

testsweet.py

tools

tools/convertfile.py

tools/convertinv.py

tools/testweave.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

dir.py

dulwich

dulwich/.bzrignore

dulwich/COPYING

dulwich/Makefile

dulwich/README

dulwich/bin

dulwich/bin/dul-daemon

dulwich/bin/dul-receive-pack

dulwich/bin/dul-upload-pack

dulwich/bin/dulwich

dulwich/docs

dulwich/docs/protocol.txt

dulwich/dulwich

dulwich/dulwich/__init__.py

dulwich/dulwich/client.py

dulwich/dulwich/commit.py

dulwich/dulwich/errors.py

dulwich/dulwich/objects.py

dulwich/dulwich/pack.py

dulwich/dulwich/protocol.py

dulwich/dulwich/repo.py

dulwich/dulwich/server.py

dulwich/dulwich/tests

dulwich/dulwich/tests/__init__.py

dulwich/dulwich/tests/data

dulwich/dulwich/tests/data/blobs

dulwich/dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/commits

dulwich/dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/packs

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack

dulwich/dulwich/tests/data/repos

dulwich/dulwich/tests/data/repos/a

dulwich/dulwich/tests/data/repos/a/.git

dulwich/dulwich/tests/data/repos/a/.git/HEAD

dulwich/dulwich/tests/data/repos/a/.git/index

dulwich/dulwich/tests/data/repos/a/.git/objects

dulwich/dulwich/tests/data/repos/a/.git/objects/2a

dulwich/dulwich/tests/data/repos/a/.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91

dulwich/dulwich/tests/data/repos/a/.git/objects/4e

dulwich/dulwich/tests/data/repos/a/.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec

dulwich/dulwich/tests/data/repos/a/.git/objects/4f

dulwich/dulwich/tests/data/repos/a/.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9

dulwich/dulwich/tests/data/repos/a/.git/objects/7d

dulwich/dulwich/tests/data/repos/a/.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364

dulwich/dulwich/tests/data/repos/a/.git/objects/a2

dulwich/dulwich/tests/data/repos/a/.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005

dulwich/dulwich/tests/data/repos/a/.git/objects/a9

dulwich/dulwich/tests/data/repos/a/.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097

dulwich/dulwich/tests/data/repos/a/.git/objects/ff

dulwich/dulwich/tests/data/repos/a/.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f

dulwich/dulwich/tests/data/repos/a/.git/objects/info

dulwich/dulwich/tests/data/repos/a/.git/objects/pack

dulwich/dulwich/tests/data/repos/a/.git/refs

dulwich/dulwich/tests/data/repos/a/.git/refs/heads

dulwich/dulwich/tests/data/repos/a/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/a/.git/refs/tags

dulwich/dulwich/tests/data/repos/a/a

dulwich/dulwich/tests/data/repos/a/b

dulwich/dulwich/tests/data/repos/a/c

dulwich/dulwich/tests/data/repos/ooo_merge

dulwich/dulwich/tests/data/repos/ooo_merge/.git

dulwich/dulwich/tests/data/repos/ooo_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/ooo_merge/.git/index

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38/74e9c60a6d149c44c928140f250d81e6381520

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76/01d7f6231db6a57f7bbb79ee52e4d462fd44d1

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2/a2766a2879c209ab1176e7e778b81ae422eeaa

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5/07291b64138b875c28e03469025b1ea20bc614

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9/e39b120c68182a4ba35349f832d0e4e61f485c

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb/5b0425c7ce46959bec94d54b9a157645e114f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/ooo_merge/a

dulwich/dulwich/tests/data/repos/ooo_merge/b

dulwich/dulwich/tests/data/repos/ooo_merge/c

dulwich/dulwich/tests/data/repos/simple_merge

dulwich/dulwich/tests/data/repos/simple_merge/.git

dulwich/dulwich/tests/data/repos/simple_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/simple_merge/.git/index

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d/89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b/6318f651a534b38f9c7aedeebbd56c1e896853

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c/ffe90e0a41ad3f5190079d7c8f036bde29cbe6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab/64bbdcc51b170d21588e5c5d391ee5c0c96dfd

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4/bdad6549dfedf25d3b89d21f506aff575b28a7

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8/0c186a03f423a81b39df39dc87fd269736ca86

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/simple_merge/a

dulwich/dulwich/tests/data/repos/simple_merge/b

dulwich/dulwich/tests/data/repos/simple_merge/d

dulwich/dulwich/tests/data/repos/simple_merge/e

dulwich/dulwich/tests/data/trees

dulwich/dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/test_objects.py

dulwich/dulwich/tests/test_pack.py

dulwich/dulwich/tests/test_repository.py

dulwich/setup.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

remote.py

repository.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_ids.py

tests/test_repository.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/revfile.py

#! /usr/bin/env python

# based on an idea by Matt Mackall

# modified to squish into bzr by Martin Pool

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Packed file revision storage.

A Revfile holds the text history of a particular source file, such

as Makefile. It can represent a tree of text versions for that

file, allowing for microbranches within a single repository.

This is stored on disk as two files: an index file, and a data file.

The index file is short and always read completely into memory; the

data file is much longer and only the relevant bits of it,

identified by the index file, need to be read.

Each text version is identified by the SHA-1 of the full text of

that version. It also has a sequence number within the file.

The index file has a short header and then a sequence of fixed-length

records:

* byte[20] SHA-1 of text (as binary, not hex)

* uint32 sequence number this is based on, or -1 for full text

* uint32 flags: 1=zlib compressed

* uint32 offset in text file of start

* uint32 length of compressed delta in text file

* uint32[3] reserved

total 48 bytes.

The header is also 48 bytes for tidyness and easy calculation.

Both the index and the text are only ever appended to; a consequence

is that sequence numbers are stable references. But not every

repository in the world will assign the same sequence numbers,

therefore the SHA-1 is the only universally unique reference.

This is meant to scale to hold 100,000 revisions of a single file, by

which time the index file will be ~4.8MB and a bit big to read

sequentially.

Some of the reserved fields could be used to implement a (semi?)

balanced tree indexed by SHA1 so we can much more efficiently find the

index associated with a particular hash. For 100,000 revs we would be

able to find it in about 17 random reads, which is not too bad.

This performs pretty well except when trying to calculate deltas of

really large files. For that the main thing would be to plug in

something faster than difflib, which is after all pure Python.

Another approach is to just store the gzipped full text of big files,

though perhaps that's too perverse?

The iter method here will generally read through the whole index file

in one go. With readahead in the kernel and python/libc (typically

128kB) this means that there should be no seeks and often only one

read() call to get everything into memory.

"""

# TODO: Something like pread() would make this slightly simpler and

# perhaps more efficient.

# TODO: Could also try to mmap things... Might be faster for the

# index in particular?

# TODO: Some kind of faster lookup of SHAs? The bad thing is that probably means

# rewriting existing records, which is not so nice.

# TODO: Something to check that regions identified in the index file

# completely butt up and do not overlap. Strictly it's not a problem

# if there are gaps and that can happen if we're interrupted while

# writing to the datafile. Overlapping would be very bad though.

# TODO: Shouldn't need to lock if we always write in append mode and

# then ftell after writing to see where it went. In any case we

# assume the whole branch is protected by a lock.

import sys, zlib, struct, mdiff, stat, os, sha

from binascii import hexlify, unhexlify

_RECORDSIZE = 48

100

101

_HEADER = "bzr revfile v1\n"

102

_HEADER = _HEADER + ('\xff' * (_RECORDSIZE - len(_HEADER)))

103

_NO_RECORD = 0xFFFFFFFFL

104

105

# fields in the index record

106

I_SHA = 0

107

I_BASE = 1

108

I_FLAGS = 2

109

I_OFFSET = 3

110

I_LEN = 4

111

112

FL_GZIP = 1

113

114

# maximum number of patches in a row before recording a whole text.

115

CHAIN_LIMIT = 25

116

117

118

class RevfileError(Exception):

119

pass

120

121

class LimitHitException(Exception):

122

pass

123

124

class Revfile(object):

125

def __init__(self, basename, mode):

126

# TODO: Lock file while open

127

128

# TODO: advise of random access

129

130

self.basename = basename

131

132

if mode not in ['r', 'w']:

133

raise RevfileError("invalid open mode %r" % mode)

134

self.mode = mode

135

136

idxname = basename + '.irev'

137

dataname = basename + '.drev'

138

139

idx_exists = os.path.exists(idxname)

140

data_exists = os.path.exists(dataname)

141

142

if idx_exists != data_exists:

143

raise RevfileError("half-assed revfile")

144

145

if not idx_exists:

146

if mode == 'r':

147

raise RevfileError("Revfile %r does not exist" % basename)

148

149

self.idxfile = open(idxname, 'w+b')

150

self.datafile = open(dataname, 'w+b')

151

152

print 'init empty file'

153

self.idxfile.write(_HEADER)

154

self.idxfile.flush()

155

else:

156

if mode == 'r':

157

diskmode = 'rb'

158

else:

159

diskmode = 'r+b'

160

161

self.idxfile = open(idxname, diskmode)

162

self.datafile = open(dataname, diskmode)

163

164

h = self.idxfile.read(_RECORDSIZE)

165

if h != _HEADER:

166

raise RevfileError("bad header %r in index of %r"

167

% (h, self.basename))

168

169

170

def _check_index(self, idx):

171

if idx < 0 or idx > len(self):

172

raise RevfileError("invalid index %r" % idx)

173

174

def _check_write(self):

175

if self.mode != 'w':

176

raise RevfileError("%r is open readonly" % self.basename)

177

178

179

def find_sha(self, s):

180

assert isinstance(s, str)

181

assert len(s) == 20

182

183

for idx, idxrec in enumerate(self):

184

if idxrec[I_SHA] == s:

185

return idx

186

else:

187

return _NO_RECORD

188

189

190

191

def _add_compressed(self, text_sha, data, base, compress):

192

# well, maybe compress

193

flags = 0

194

if compress:

195

data_len = len(data)

196

if data_len > 50:

197

# don't do compression if it's too small; it's unlikely to win

198

# enough to be worthwhile

199

compr_data = zlib.compress(data)

200

compr_len = len(compr_data)

201

if compr_len < data_len:

202

data = compr_data

203

flags = FL_GZIP

204

##print '- compressed %d -> %d, %.1f%%' \

205

## % (data_len, compr_len, float(compr_len)/float(data_len) * 100.0)

206

return self._add_raw(text_sha, data, base, flags)

207

208

209

210

def _add_raw(self, text_sha, data, base, flags):

211

"""Add pre-processed data, can be either full text or delta.

212

213

This does the compression if that makes sense."""

214

idx = len(self)

215

self.datafile.seek(0, 2) # to end

216

self.idxfile.seek(0, 2)

217

assert self.idxfile.tell() == _RECORDSIZE * (idx + 1)

218

data_offset = self.datafile.tell()

219

220

assert isinstance(data, str) # not unicode or anything weird

221

222

self.datafile.write(data)

223

self.datafile.flush()

224

225

assert isinstance(text_sha, str)

226

entry = text_sha

227

entry += struct.pack(">IIII12x", base, flags, data_offset, len(data))

228

assert len(entry) == _RECORDSIZE

229

230

self.idxfile.write(entry)

231

self.idxfile.flush()

232

233

return idx

234

235

236

237

def _add_full_text(self, text, text_sha, compress):

238

"""Add a full text to the file.

239

240

This is not compressed against any reference version.

241

242

Returns the index for that text."""

243

return self._add_compressed(text_sha, text, _NO_RECORD, compress)

244

245

246

# NOT USED

247

def _choose_base(self, seed, base):

248

while seed & 3 == 3:

249

if base == _NO_RECORD:

250

return _NO_RECORD

251

idxrec = self[base]

252

if idxrec[I_BASE] == _NO_RECORD:

253

return base

254

255

base = idxrec[I_BASE]

256

seed >>= 2

257

258

return base # relative to this full text

259

260

261

262

def _add_delta(self, text, text_sha, base, compress):

263

"""Add a text stored relative to a previous text."""

264

self._check_index(base)

265

266

try:

267

base_text = self.get(base, CHAIN_LIMIT)

268

except LimitHitException:

269

return self._add_full_text(text, text_sha, compress)

270

271

data = mdiff.bdiff(base_text, text)

272

273

# If the delta is larger than the text, we might as well just

274

# store the text. (OK, the delta might be more compressible,

275

# but the overhead of applying it probably still makes it

276

# bad, and I don't want to compress both of them to find out.)

277

if len(data) >= len(text):

278

return self._add_full_text(text, text_sha, compress)

279

else:

280

return self._add_compressed(text_sha, data, base, compress)

281

282

283

def add(self, text, base=_NO_RECORD, compress=True):

284

"""Add a new text to the revfile.

285

286

If the text is already present them its existing id is

287

returned and the file is not changed.

288

289

If compress is true then gzip compression will be used if it

290

reduces the size.

291

292

If a base index is specified, that text *may* be used for

293

delta compression of the new text. Delta compression will

294

only be used if it would be a size win and if the existing

295

base is not at too long of a delta chain already.

296

"""

297

self._check_write()

298

299

text_sha = sha.new(text).digest()

300

301

idx = self.find_sha(text_sha)

302

if idx != _NO_RECORD:

303

# TODO: Optional paranoid mode where we read out that record and make sure

304

# it's the same, in case someone ever breaks SHA-1.

305

return idx # already present

306

307

# base = self._choose_base(ord(text_sha[0]), base)

308

309

if base == _NO_RECORD:

310

return self._add_full_text(text, text_sha, compress)

311

else:

312

return self._add_delta(text, text_sha, base, compress)

313

314

315

316

def get(self, idx, recursion_limit=None):

317

"""Retrieve text of a previous revision.

318

319

If recursion_limit is an integer then walk back at most that

320

many revisions and then raise LimitHitException, indicating

321

that we ought to record a new file text instead of another

322

delta. Don't use this when trying to get out an existing

323

revision."""

324

325

idxrec = self[idx]

326

base = idxrec[I_BASE]

327

if base == _NO_RECORD:

328

text = self._get_full_text(idx, idxrec)

329

else:

330

text = self._get_patched(idx, idxrec, recursion_limit)

331

332

if sha.new(text).digest() != idxrec[I_SHA]:

333

raise RevfileError("corrupt SHA-1 digest on record %d"

334

% idx)

335

336

return text

337

338

339

340

def _get_raw(self, idx, idxrec):

341

flags = idxrec[I_FLAGS]

342

if flags & ~FL_GZIP:

343

raise RevfileError("unsupported index flags %#x on index %d"

344

% (flags, idx))

345

346

l = idxrec[I_LEN]

347

if l == 0:

348

return ''

349

350

self.datafile.seek(idxrec[I_OFFSET])

351

352

data = self.datafile.read(l)

353

if len(data) != l:

354

raise RevfileError("short read %d of %d "

355

"getting text for record %d in %r"

356

% (len(data), l, idx, self.basename))

357

358

if flags & FL_GZIP:

359

data = zlib.decompress(data)

360

361

return data

362

363

364

def _get_full_text(self, idx, idxrec):

365

assert idxrec[I_BASE] == _NO_RECORD

366

367

text = self._get_raw(idx, idxrec)

368

369

return text

370

371

372

def _get_patched(self, idx, idxrec, recursion_limit):

373

base = idxrec[I_BASE]

374

assert base >= 0

375

assert base < idx # no loops!

376

377

if recursion_limit == None:

378

sub_limit = None

379

else:

380

sub_limit = recursion_limit - 1

381

if sub_limit < 0:

382

raise LimitHitException()

383

384

base_text = self.get(base, sub_limit)

385

patch = self._get_raw(idx, idxrec)

386

387

text = mdiff.bpatch(base_text, patch)

388

389

return text

390

391

392

393

def __len__(self):

394

"""Return number of revisions."""

395

l = os.fstat(self.idxfile.fileno())[stat.ST_SIZE]

396

if l % _RECORDSIZE:

397

raise RevfileError("bad length %d on index of %r" % (l, self.basename))

398

if l < _RECORDSIZE:

399

raise RevfileError("no header present in index of %r" % (self.basename))

400

return int(l / _RECORDSIZE) - 1

401

402

403

def __getitem__(self, idx):

404

"""Index by sequence id returns the index field"""

405

## TODO: Can avoid seek if we just moved there...

406

self._seek_index(idx)

407

idxrec = self._read_next_index()

408

if idxrec == None:

409

raise IndexError("no index %d" % idx)

410

else:

411

return idxrec

412

413

414

def _seek_index(self, idx):

415

if idx < 0:

416

raise RevfileError("invalid index %r" % idx)

417

self.idxfile.seek((idx + 1) * _RECORDSIZE)

418

419

420

421

def __iter__(self):

422

"""Read back all index records.

423

424

Do not seek the index file while this is underway!"""

425

## sys.stderr.write(" ** iter called ** \n")

426

self._seek_index(0)

427

while True:

428

idxrec = self._read_next_index()

429

if not idxrec:

430

break

431

yield idxrec

432

433

434

def _read_next_index(self):

435

rec = self.idxfile.read(_RECORDSIZE)

436

if not rec:

437

return None

438

elif len(rec) != _RECORDSIZE:

439

raise RevfileError("short read of %d bytes getting index %d from %r"

440

% (len(rec), idx, self.basename))

441

442

return struct.unpack(">20sIIII12x", rec)

443

444

445

def dump(self, f=sys.stdout):

446

f.write('%-8s %-40s %-8s %-8s %-8s %-8s\n'

447

% tuple('idx sha1 base flags offset len'.split()))

448

f.write('-------- ---------------------------------------- ')

449

f.write('-------- -------- -------- --------\n')

450

451

for i, rec in enumerate(self):

452

f.write("#%-7d %40s " % (i, hexlify(rec[0])))

453

if rec[1] == _NO_RECORD:

454

f.write("(none) ")

455

else:

456

f.write("#%-7d " % rec[1])

457

458

f.write("%8x %8d %8d\n" % (rec[2], rec[3], rec[4]))

459

460

461

def total_text_size(self):

462

"""Return the sum of sizes of all file texts.

463

464

This is how much space they would occupy if they were stored without

465

delta and gzip compression.

466

467

As a side effect this completely validates the Revfile, checking that all

468

texts can be reproduced with the correct SHA-1."""

469

t = 0L

470

for idx in range(len(self)):

471

t += len(self.get(idx))

472

return t

473

474

475

476

def main(argv):

477

try:

478

cmd = argv[1]

479

filename = argv[2]

480

except IndexError:

481

sys.stderr.write("usage: revfile dump REVFILE\n"

482

" revfile add REVFILE < INPUT\n"

483

" revfile add-delta REVFILE BASE < INPUT\n"

484

" revfile add-series REVFILE BASE FILE...\n"

485

" revfile get REVFILE IDX\n"

486

" revfile find-sha REVFILE HEX\n"

487

" revfile total-text-size REVFILE\n"

488

" revfile last REVFILE\n")

489

return 1

490

491

def rw():

492

return Revfile(filename, 'w')

493

494

def ro():

495

return Revfile(filename, 'r')

496

497

if cmd == 'add':

498

print rw().add(sys.stdin.read())

499

elif cmd == 'add-delta':

500

print rw().add(sys.stdin.read(), int(argv[3]))

501

elif cmd == 'add-series':

502

r = rw()

503

rev = int(argv[3])

504

for fn in argv[4:]:

505

print rev

506

rev = r.add(file(fn).read(), rev)

507

elif cmd == 'dump':

508

ro().dump()

509

elif cmd == 'get':

510

try:

511

idx = int(argv[3])

512

except IndexError:

513

sys.stderr.write("usage: revfile get FILE IDX\n")

514

return 1

515

516

r = ro()

517

518

if idx < 0 or idx >= len(r):

519

sys.stderr.write("invalid index %r\n" % idx)

520

return 1

521

522

sys.stdout.write(r.get(idx))

523

elif cmd == 'find-sha':

524

try:

525

s = unhexlify(argv[3])

526

except IndexError:

527

sys.stderr.write("usage: revfile find-sha FILE HEX\n")

528

return 1

529

530

idx = ro().find_sha(s)

531

if idx == _NO_RECORD:

532

sys.stderr.write("no such record\n")

533

return 1

534

else:

535

print idx

536

elif cmd == 'total-text-size':

537

print ro().total_text_size()

538

elif cmd == 'last':

539

print len(ro())-1

540

else:

541

sys.stderr.write("unknown command %r\n" % cmd)

542

return 1

543

544

545

if __name__ == '__main__':

546

import sys

547

sys.exit(main(sys.argv) or 0)

Older »