/brz/remove-bazaar : revision 0.211.35

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to dulwich/pack.py

Committer: Jelmer Vernooij
Date: 2008-12-11 10:52:43 UTC
mto: (0.215.1 trunk)
mto: This revision was merged to the branch mainline in revision 6960.
Revision ID: jelmer@samba.org-20081211105243-dokx6i1dofwnlrrm

Add simple pack dump utility.

files added:

.bzrignore

COPYING

Makefile

README

bin/dumppack

dulwich

dulwich/__init__.py

dulwich/commit.py

dulwich/errors.py

dulwich/objects.py

dulwich/pack.py

dulwich/repo.py

dulwich/tests

dulwich/tests/__init__.py

dulwich/tests/data

dulwich/tests/data/blobs

dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349

dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/tests/data/commits

dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/tests/data/packs

dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx

dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack

dulwich/tests/data/repos

dulwich/tests/data/repos/a

dulwich/tests/data/repos/a/.git

dulwich/tests/data/repos/a/.git/HEAD

dulwich/tests/data/repos/a/.git/index

dulwich/tests/data/repos/a/.git/objects

dulwich/tests/data/repos/a/.git/objects/2a

dulwich/tests/data/repos/a/.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91

dulwich/tests/data/repos/a/.git/objects/4e

dulwich/tests/data/repos/a/.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec

dulwich/tests/data/repos/a/.git/objects/4f

dulwich/tests/data/repos/a/.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9

dulwich/tests/data/repos/a/.git/objects/7d

dulwich/tests/data/repos/a/.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364

dulwich/tests/data/repos/a/.git/objects/a2

dulwich/tests/data/repos/a/.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005

dulwich/tests/data/repos/a/.git/objects/a9

dulwich/tests/data/repos/a/.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097

dulwich/tests/data/repos/a/.git/objects/ff

dulwich/tests/data/repos/a/.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f

dulwich/tests/data/repos/a/.git/objects/info

dulwich/tests/data/repos/a/.git/objects/pack

dulwich/tests/data/repos/a/.git/refs

dulwich/tests/data/repos/a/.git/refs/heads

dulwich/tests/data/repos/a/.git/refs/heads/master

dulwich/tests/data/repos/a/.git/refs/tags

dulwich/tests/data/repos/a/a

dulwich/tests/data/repos/a/b

dulwich/tests/data/repos/a/c

dulwich/tests/data/repos/ooo_merge

dulwich/tests/data/repos/ooo_merge/.git

dulwich/tests/data/repos/ooo_merge/.git/HEAD

dulwich/tests/data/repos/ooo_merge/.git/index

dulwich/tests/data/repos/ooo_merge/.git/objects

dulwich/tests/data/repos/ooo_merge/.git/objects/29

dulwich/tests/data/repos/ooo_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/tests/data/repos/ooo_merge/.git/objects/38

dulwich/tests/data/repos/ooo_merge/.git/objects/38/74e9c60a6d149c44c928140f250d81e6381520

dulwich/tests/data/repos/ooo_merge/.git/objects/6f

dulwich/tests/data/repos/ooo_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/tests/data/repos/ooo_merge/.git/objects/70

dulwich/tests/data/repos/ooo_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/tests/data/repos/ooo_merge/.git/objects/76

dulwich/tests/data/repos/ooo_merge/.git/objects/76/01d7f6231db6a57f7bbb79ee52e4d462fd44d1

dulwich/tests/data/repos/ooo_merge/.git/objects/90

dulwich/tests/data/repos/ooo_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/tests/data/repos/ooo_merge/.git/objects/95

dulwich/tests/data/repos/ooo_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/tests/data/repos/ooo_merge/.git/objects/b2

dulwich/tests/data/repos/ooo_merge/.git/objects/b2/a2766a2879c209ab1176e7e778b81ae422eeaa

dulwich/tests/data/repos/ooo_merge/.git/objects/f5

dulwich/tests/data/repos/ooo_merge/.git/objects/f5/07291b64138b875c28e03469025b1ea20bc614

dulwich/tests/data/repos/ooo_merge/.git/objects/f9

dulwich/tests/data/repos/ooo_merge/.git/objects/f9/e39b120c68182a4ba35349f832d0e4e61f485c

dulwich/tests/data/repos/ooo_merge/.git/objects/fb

dulwich/tests/data/repos/ooo_merge/.git/objects/fb/5b0425c7ce46959bec94d54b9a157645e114f5

dulwich/tests/data/repos/ooo_merge/.git/objects/info

dulwich/tests/data/repos/ooo_merge/.git/objects/pack

dulwich/tests/data/repos/ooo_merge/.git/refs

dulwich/tests/data/repos/ooo_merge/.git/refs/heads

dulwich/tests/data/repos/ooo_merge/.git/refs/heads/master

dulwich/tests/data/repos/ooo_merge/.git/refs/tags

dulwich/tests/data/repos/ooo_merge/a

dulwich/tests/data/repos/ooo_merge/b

dulwich/tests/data/repos/ooo_merge/c

dulwich/tests/data/repos/simple_merge

dulwich/tests/data/repos/simple_merge/.git

dulwich/tests/data/repos/simple_merge/.git/HEAD

dulwich/tests/data/repos/simple_merge/.git/index

dulwich/tests/data/repos/simple_merge/.git/objects

dulwich/tests/data/repos/simple_merge/.git/objects/0d

dulwich/tests/data/repos/simple_merge/.git/objects/0d/89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/tests/data/repos/simple_merge/.git/objects/1b

dulwich/tests/data/repos/simple_merge/.git/objects/1b/6318f651a534b38f9c7aedeebbd56c1e896853

dulwich/tests/data/repos/simple_merge/.git/objects/29

dulwich/tests/data/repos/simple_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/tests/data/repos/simple_merge/.git/objects/4c

dulwich/tests/data/repos/simple_merge/.git/objects/4c/ffe90e0a41ad3f5190079d7c8f036bde29cbe6

dulwich/tests/data/repos/simple_merge/.git/objects/5d

dulwich/tests/data/repos/simple_merge/.git/objects/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/tests/data/repos/simple_merge/.git/objects/60

dulwich/tests/data/repos/simple_merge/.git/objects/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/tests/data/repos/simple_merge/.git/objects/6f

dulwich/tests/data/repos/simple_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/tests/data/repos/simple_merge/.git/objects/70

dulwich/tests/data/repos/simple_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/tests/data/repos/simple_merge/.git/objects/90

dulwich/tests/data/repos/simple_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/tests/data/repos/simple_merge/.git/objects/95

dulwich/tests/data/repos/simple_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/tests/data/repos/simple_merge/.git/objects/ab

dulwich/tests/data/repos/simple_merge/.git/objects/ab/64bbdcc51b170d21588e5c5d391ee5c0c96dfd

dulwich/tests/data/repos/simple_merge/.git/objects/d4

dulwich/tests/data/repos/simple_merge/.git/objects/d4/bdad6549dfedf25d3b89d21f506aff575b28a7

dulwich/tests/data/repos/simple_merge/.git/objects/d8

dulwich/tests/data/repos/simple_merge/.git/objects/d8/0c186a03f423a81b39df39dc87fd269736ca86

dulwich/tests/data/repos/simple_merge/.git/objects/e6

dulwich/tests/data/repos/simple_merge/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/tests/data/repos/simple_merge/.git/objects/info

dulwich/tests/data/repos/simple_merge/.git/objects/pack

dulwich/tests/data/repos/simple_merge/.git/refs

dulwich/tests/data/repos/simple_merge/.git/refs/heads

dulwich/tests/data/repos/simple_merge/.git/refs/heads/master

dulwich/tests/data/repos/simple_merge/.git/refs/tags

dulwich/tests/data/repos/simple_merge/a

dulwich/tests/data/repos/simple_merge/b

dulwich/tests/data/repos/simple_merge/d

dulwich/tests/data/repos/simple_merge/e

dulwich/tests/data/trees

dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/tests/test_objects.py

dulwich/tests/test_pack.py

dulwich/tests/test_repository.py

setup.py

files removed:

.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzr-man.py

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/newinventory.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/selftest

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/status.py

bzrlib/store.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/upgrade.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

effbot

effbot/__init__.py

effbot/org

effbot/org/__init__.py

effbot/org/gzip_consumer.py

effbot/org/http_client.py

effbot/org/http_manager.py

elementtree

elementtree/ElementTree.py

elementtree/__init__.py

notes

notes/new-inventory-sample.xml

notes/performance.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

setup.py

testbzr

testsweet.py

tools

tools/convertfile.py

tools/convertinv.py

tools/testweave.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

Show diffs side-by-side

added added

removed removed

dulwich/pack.py

# pack.py -- For dealing wih packed git objects.

# The code is loosely based on that in the sha1_file.c file from git itself,

# which is Copyright (C) Linus Torvalds, 2005 and distributed under the

# GPL version 2.

# This program is free software; you can redistribute it and/or

# modify it under the terms of the GNU General Public License

# as published by the Free Software Foundation; version 2

# of the License.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,

# MA 02110-1301, USA.

"""Classes for dealing with packed git objects.

A pack is a compact representation of a bunch of objects, stored

using deltas where possible.

They have two parts, the pack file, which stores the data, and an index

that tells you where the data is.

To find an object you look in all of the index files 'til you find a

match for the object name. You then use the pointer got from this as

a pointer in to the corresponding packfile.

"""

from collections import defaultdict

import hashlib

import mmap

import os

import struct

import sys

supports_mmap_offset = (sys.version_info[0] >= 3 or

(sys.version_info[0] == 2 and sys.version_info[1] >= 6))

from objects import (ShaFile,

_decompress,

)

def hex_to_sha(hex):

ret = ""

for i in range(0, len(hex), 2):

ret += chr(int(hex[i:i+2], 16))

return ret

def sha_to_hex(sha):

ret = ""

for i in sha:

ret += "%02x" % ord(i)

return ret

MAX_MMAP_SIZE = 256 * 1024 * 1024

def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):

if offset+size > MAX_MMAP_SIZE and not supports_mmap_offset:

raise AssertionError("%s is larger than 256 meg, and this version "

"of Python does not support the offset argument to mmap().")

if supports_mmap_offset:

return mmap.mmap(f.fileno(), size, access=access, offset=offset)

else:

class ArraySkipper(object):

def __init__(self, array, offset):

self.array = array

self.offset = offset

def __getslice__(self, i, j):

return self.array[i+self.offset:j+self.offset]

def __getitem__(self, i):

return self.array[i+self.offset]

def __len__(self):

return len(self.array) - self.offset

def __str__(self):

return str(self.array[self.offset:])

mem = mmap.mmap(f.fileno(), size+offset, access=access)

if offset == 0:

return mem

return ArraySkipper(mem, offset)

def multi_ord(map, start, count):

value = 0

for i in range(count):

value = value * 0x100 + ord(map[start+i])

return value

100

101

102

class PackIndex(object):

103

"""An index in to a packfile.

104

105

Given a sha id of an object a pack index can tell you the location in the

106

packfile of that object if it has it.

107

108

To do the loop it opens the file, and indexes first 256 4 byte groups

109

with the first byte of the sha id. The value in the four byte group indexed

110

is the end of the group that shares the same starting byte. Subtract one

111

from the starting byte and index again to find the start of the group.

112

The values are sorted by sha id within the group, so do the math to find

113

the start and end offset and then bisect in to find if the value is present.

114

"""

115

116

PACK_INDEX_HEADER_SIZE = 0x100 * 4

117

sha_bytes = 20

118

record_size = sha_bytes + 4

119

120

def __init__(self, filename):

121

"""Create a pack index object.

122

123

Provide it with the name of the index file to consider, and it will map

124

it whenever required.

125

"""

126

self._filename = filename

127

assert os.path.exists(filename), "%s is not a pack index" % filename

128

# Take the size now, so it can be checked each time we map the file to

129

# ensure that it hasn't changed.

130

self._size = os.path.getsize(filename)

131

self._file = open(filename, 'r')

132

self._contents = simple_mmap(self._file, 0, self._size)

133

if self._contents[:4] != '\377tOc':

134

self.version = 1

135

self._fan_out_table = self._read_fan_out_table(0)

136

else:

137

(self.version, ) = struct.unpack_from(">L", self._contents, 4)

138

assert self.version in (2,), "Version was %d" % self.version

139

self._fan_out_table = self._read_fan_out_table(8)

140

self._name_table_offset = 8 + 0x100 * 4

141

self._crc32_table_offset = self._name_table_offset + 20 * len(self)

142

self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)

143

144

def close(self):

145

self._file.close()

146

147

def __len__(self):

148

"""Return the number of entries in this pack index."""

149

return self._fan_out_table[-1]

150

151

def _unpack_entry(self, i):

152

"""Unpack the i-th entry in the index file.

153

154

:return: Tuple with object name (SHA), offset in pack file and

155

CRC32 checksum (if known)."""

156

if self.version == 1:

157

(offset, name) = struct.unpack_from(">L20s", self._contents,

158

self.PACK_INDEX_HEADER_SIZE + (i * self.record_size))

159

return (name, offset, None)

160

else:

161

return (self._unpack_name(i), self._unpack_offset(i),

162

self._unpack_crc32_checksum(i))

163

164

def _unpack_name(self, i):

165

if self.version == 1:

166

return self._unpack_entry(i)[0]

167

else:

168

return struct.unpack_from("20s", self._contents,

169

self._name_table_offset + i * 20)[0]

170

171

def _unpack_offset(self, i):

172

if self.version == 1:

173

return self._unpack_entry(i)[1]

174

else:

175

return struct.unpack_from(">L", self._contents,

176

self._pack_offset_table_offset + i * 4)[0]

177

178

def _unpack_crc32_checksum(self, i):

179

if self.version == 1:

180

return None

181

else:

182

return struct.unpack_from(">L", self._contents,

183

self._crc32_table_offset + i * 4)[0]

184

185

def iterentries(self):

186

"""Iterate over the entries in this pack index.

187

188

Will yield tuples with object name, offset in packfile and crc32 checksum.

189

"""

190

for i in range(len(self)):

191

yield self._unpack_entry(i)

192

193

def _read_fan_out_table(self, start_offset):

194

ret = []

195

for i in range(0x100):

196

ret.append(struct.unpack(">L", self._contents[start_offset+i*4:start_offset+(i+1)*4])[0])

197

return ret

198

199

def check(self):

200

"""Check that the stored checksum matches the actual checksum."""

201

return self.calculate_checksum() == self.get_stored_checksums()[1]

202

203

def calculate_checksum(self):

204

f = open(self._filename, 'r')

205

try:

206

return hashlib.sha1(self._contents[:-20]).digest()

207

finally:

208

f.close()

209

210

def get_stored_checksums(self):

211

"""Return the SHA1 checksums stored for the corresponding packfile and

212

this header file itself."""

213

return str(self._contents[-40:-20]), str(self._contents[-20:])

214

215

def object_index(self, sha):

216

"""Return the index in to the corresponding packfile for the object.

217

218

Given the name of an object it will return the offset that object lives

219

at within the corresponding pack file. If the pack file doesn't have the

220

object then None will be returned.

221

"""

222

size = os.path.getsize(self._filename)

223

assert size == self._size, "Pack index %s has changed size, I don't " \

224

"like that" % self._filename

225

return self._object_index(sha)

226

227

def _object_index(self, hexsha):

228

"""See object_index"""

229

sha = hex_to_sha(hexsha)

230

start = self._fan_out_table[ord(sha[0])-1]

231

end = self._fan_out_table[ord(sha[0])]

232

while start < end:

233

i = (start + end)/2

234

file_sha = self._unpack_name(i)

235

if file_sha == sha:

236

return self._unpack_offset(i)

237

elif file_sha < sha:

238

start = i + 1

239

else:

240

end = i - 1

241

return None

242

243

244

class PackData(object):

245

"""The data contained in a packfile.

246

247

Pack files can be accessed both sequentially for exploding a pack, and

248

directly with the help of an index to retrieve a specific object.

249

250

The objects within are either complete or a delta aginst another.

251

252

The header is variable length. If the MSB of each byte is set then it

253

indicates that the subsequent byte is still part of the header.

254

For the first byte the next MS bits are the type, which tells you the type

255

of object, and whether it is a delta. The LS byte is the lowest bits of the

256

size. For each subsequent byte the LS 7 bits are the next MS bits of the

257

size, i.e. the last byte of the header contains the MS bits of the size.

258

259

For the complete objects the data is stored as zlib deflated data.

260

The size in the header is the uncompressed object size, so to uncompress

261

you need to just keep feeding data to zlib until you get an object back,

262

or it errors on bad data. This is done here by just giving the complete

263

buffer from the start of the deflated object on. This is bad, but until I

264

get mmap sorted out it will have to do.

265

266

Currently there are no integrity checks done. Also no attempt is made to try

267

and detect the delta case, or a request for an object at the wrong position.

268

It will all just throw a zlib or KeyError.

269

"""

270

271

def __init__(self, filename):

272

"""Create a PackData object that represents the pack in the given filename.

273

274

The file must exist and stay readable until the object is disposed of. It

275

must also stay the same size. It will be mapped whenever needed.

276

277

Currently there is a restriction on the size of the pack as the python

278

mmap implementation is flawed.

279

"""

280

self._filename = filename

281

assert os.path.exists(filename), "%s is not a packfile" % filename

282

self._size = os.path.getsize(filename)

283

self._read_header()

284

285

def _read_header(self):

286

f = open(self._filename, 'rb')

287

try:

288

header = f.read(12)

289

f.seek(self._size-20)

290

self._stored_checksum = f.read(20)

291

finally:

292

f.close()

293

assert header[:4] == "PACK"

294

(version,) = struct.unpack_from(">L", header, 4)

295

assert version in (2, 3), "Version was %d" % version

296

(self._num_objects,) = struct.unpack_from(">L", header, 8)

297

298

def __len__(self):

299

"""Returns the number of objects in this pack."""

300

return self._num_objects

301

302

def calculate_checksum(self):

303

f = open(self._filename, 'rb')

304

try:

305

map = simple_mmap(f, 0, self._size)

306

return hashlib.sha1(map[:-20]).digest()

307

finally:

308

f.close()

309

310

def check(self):

311

return (self.calculate_checksum() == self._stored_checksum)

312

313

def get_object_at(self, offset):

314

"""Given an offset in to the packfile return the object that is there.

315

316

Using the associated index the location of an object can be looked up, and

317

then the packfile can be asked directly for that object using this

318

function.

319

320

Currently only non-delta objects are supported.

321

"""

322

assert isinstance(offset, long) or isinstance(offset, int)

323

size = os.path.getsize(self._filename)

324

assert size == self._size, "Pack data %s has changed size, I don't " \

325

"like that" % self._filename

326

f = open(self._filename, 'rb')

327

try:

328

map = simple_mmap(f, offset, size-offset)

329

return self._get_object_at(map)

330

finally:

331

f.close()

332

333

def _get_object_at(self, map):

334

first_byte = ord(map[0])

335

sign_extend = first_byte & 0x80

336

type = (first_byte >> 4) & 0x07

337

size = first_byte & 0x0f

338

cur_offset = 0

339

while sign_extend > 0:

340

byte = ord(map[cur_offset+1])

341

sign_extend = byte & 0x80

342

size_part = byte & 0x7f

343

size += size_part << ((cur_offset * 7) + 4)

344

cur_offset += 1

345

raw_base = cur_offset+1

346

# The size is the inflated size, so we have no idea what the deflated size

347

# is, so for now give it as much as we have. It should really iterate

348

# feeding it more data if it doesn't decompress, but as we have the whole

349

# thing then just use it.

350

raw = map[raw_base:]

351

uncomp = _decompress(raw)

352

obj = ShaFile.from_raw_string(type, uncomp)

353

return obj

354

355

356

class SHA1Writer(object):

357

358

def __init__(self, f):

359

self.f = f

360

self.sha1 = hashlib.sha1("")

361

362

def write(self, data):

363

self.sha1.update(data)

364

self.f.write(data)

365

366

def close(self):

367

sha = self.sha1.digest()

368

assert len(sha) == 20

369

self.f.write(sha)

370

self.f.close()

371

return sha

372

373

374

def write_pack(filename, objects):

375

"""Write a new pack file.

376

377

:param filename: The filename of the new pack file.

378

:param objects: List of objects to write.

379

:return: List with (name, offset, crc32 checksum) entries, pack checksum

380

"""

381

f = open(filename, 'w')

382

entries = []

383

f = SHA1Writer(f)

384

f.write("PACK") # Pack header

385

f.write(struct.pack(">L", 2)) # Pack version

386

f.write(struct.pack(">L", len(objects))) # Number of objects in pack

387

for o in objects:

388

pass # FIXME: Write object

389

return entries, f.close()

390

391

392

def write_pack_index_v1(filename, entries, pack_checksum):

393

"""Write a new pack index file.

394

395

:param filename: The filename of the new pack index file.

396

:param entries: List of tuples with object name (sha), offset_in_pack, and

397

crc32_checksum.

398

:param pack_checksum: Checksum of the pack file.

399

"""

400

# Sort entries first

401

402

entries = sorted(entries)

403

f = open(filename, 'w')

404

f = SHA1Writer(f)

405

fan_out_table = defaultdict(lambda: 0)

406

for (name, offset, entry_checksum) in entries:

407

fan_out_table[ord(name[0])] += 1

408

# Fan-out table

409

for i in range(0x100):

410

f.write(struct.pack(">L", fan_out_table[i]))

411

fan_out_table[i+1] += fan_out_table[i]

412

for (name, offset, entry_checksum) in entries:

413

f.write(struct.pack(">L20s", offset, name))

414

assert len(pack_checksum) == 20

415

f.write(pack_checksum)

416

f.close()

417

418

419

def write_pack_index_v2(filename, entries, pack_checksum):

420

"""Write a new pack index file.

421

422

:param filename: The filename of the new pack index file.

423

:param entries: List of tuples with object name (sha), offset_in_pack, and

424

crc32_checksum.

425

:param pack_checksum: Checksum of the pack file.

426

"""

427

# Sort entries first

428

entries = sorted(entries)

429

f = open(filename, 'w')

430

f = SHA1Writer(f)

431

f.write('\377tOc')

432

f.write(struct.pack(">L", 2))

433

fan_out_table = defaultdict(lambda: 0)

434

for (name, offset, entry_checksum) in entries:

435

fan_out_table[ord(name[0])] += 1

436

# Fan-out table

437

for i in range(0x100):

438

f.write(struct.pack(">L", fan_out_table[i]))

439

fan_out_table[i+1] += fan_out_table[i]

440

for (name, offset, entry_checksum) in entries:

441

f.write(name)

442

for (name, offset, entry_checksum) in entries:

443

f.write(struct.pack(">L", entry_checksum))

444

for (name, offset, entry_checksum) in entries:

445

# FIXME: handle if MSBit is set in offset

446

f.write(struct.pack(">L", offset))

447

# FIXME: handle table for pack files > 8 Gb

448

assert len(pack_checksum) == 20

449

f.write(pack_checksum)

450

f.close()

451

Older »