/brz/remove-bazaar : revision 0.211.20

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to dulwich/pack.py

Committer: Jelmer Vernooij
Date: 2008-12-11 07:30:52 UTC
mto: (0.215.1 trunk)
mto: This revision was merged to the branch mainline in revision 6960.
Revision ID: jelmer@samba.org-20081211073052-lq0ypg5h3vvyzp3j

Change project name to dulwich everywhere, add assertion.

files added:

.bzrignore

COPYING

Makefile

README

dulwich

dulwich/__init__.py

dulwich/commit.py

dulwich/errors.py

dulwich/objects.py

dulwich/pack.py

dulwich/repo.py

dulwich/tests

dulwich/tests/__init__.py

dulwich/tests/data

dulwich/tests/data/blobs

dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349

dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/tests/data/commits

dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/tests/data/packs

dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx

dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack

dulwich/tests/data/repos

dulwich/tests/data/repos/a

dulwich/tests/data/repos/a/.git

dulwich/tests/data/repos/a/.git/HEAD

dulwich/tests/data/repos/a/.git/index

dulwich/tests/data/repos/a/.git/objects

dulwich/tests/data/repos/a/.git/objects/2a

dulwich/tests/data/repos/a/.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91

dulwich/tests/data/repos/a/.git/objects/4e

dulwich/tests/data/repos/a/.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec

dulwich/tests/data/repos/a/.git/objects/4f

dulwich/tests/data/repos/a/.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9

dulwich/tests/data/repos/a/.git/objects/7d

dulwich/tests/data/repos/a/.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364

dulwich/tests/data/repos/a/.git/objects/a2

dulwich/tests/data/repos/a/.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005

dulwich/tests/data/repos/a/.git/objects/a9

dulwich/tests/data/repos/a/.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097

dulwich/tests/data/repos/a/.git/objects/ff

dulwich/tests/data/repos/a/.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f

dulwich/tests/data/repos/a/.git/objects/info

dulwich/tests/data/repos/a/.git/objects/pack

dulwich/tests/data/repos/a/.git/refs

dulwich/tests/data/repos/a/.git/refs/heads

dulwich/tests/data/repos/a/.git/refs/heads/master

dulwich/tests/data/repos/a/.git/refs/tags

dulwich/tests/data/repos/a/a

dulwich/tests/data/repos/a/b

dulwich/tests/data/repos/a/c

dulwich/tests/data/repos/ooo_merge

dulwich/tests/data/repos/ooo_merge/.git

dulwich/tests/data/repos/ooo_merge/.git/HEAD

dulwich/tests/data/repos/ooo_merge/.git/index

dulwich/tests/data/repos/ooo_merge/.git/objects

dulwich/tests/data/repos/ooo_merge/.git/objects/29

dulwich/tests/data/repos/ooo_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/tests/data/repos/ooo_merge/.git/objects/38

dulwich/tests/data/repos/ooo_merge/.git/objects/38/74e9c60a6d149c44c928140f250d81e6381520

dulwich/tests/data/repos/ooo_merge/.git/objects/6f

dulwich/tests/data/repos/ooo_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/tests/data/repos/ooo_merge/.git/objects/70

dulwich/tests/data/repos/ooo_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/tests/data/repos/ooo_merge/.git/objects/76

dulwich/tests/data/repos/ooo_merge/.git/objects/76/01d7f6231db6a57f7bbb79ee52e4d462fd44d1

dulwich/tests/data/repos/ooo_merge/.git/objects/90

dulwich/tests/data/repos/ooo_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/tests/data/repos/ooo_merge/.git/objects/95

dulwich/tests/data/repos/ooo_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/tests/data/repos/ooo_merge/.git/objects/b2

dulwich/tests/data/repos/ooo_merge/.git/objects/b2/a2766a2879c209ab1176e7e778b81ae422eeaa

dulwich/tests/data/repos/ooo_merge/.git/objects/f5

dulwich/tests/data/repos/ooo_merge/.git/objects/f5/07291b64138b875c28e03469025b1ea20bc614

dulwich/tests/data/repos/ooo_merge/.git/objects/f9

dulwich/tests/data/repos/ooo_merge/.git/objects/f9/e39b120c68182a4ba35349f832d0e4e61f485c

dulwich/tests/data/repos/ooo_merge/.git/objects/fb

dulwich/tests/data/repos/ooo_merge/.git/objects/fb/5b0425c7ce46959bec94d54b9a157645e114f5

dulwich/tests/data/repos/ooo_merge/.git/objects/info

dulwich/tests/data/repos/ooo_merge/.git/objects/pack

dulwich/tests/data/repos/ooo_merge/.git/refs

dulwich/tests/data/repos/ooo_merge/.git/refs/heads

dulwich/tests/data/repos/ooo_merge/.git/refs/heads/master

dulwich/tests/data/repos/ooo_merge/.git/refs/tags

dulwich/tests/data/repos/ooo_merge/a

dulwich/tests/data/repos/ooo_merge/b

dulwich/tests/data/repos/ooo_merge/c

dulwich/tests/data/repos/simple_merge

dulwich/tests/data/repos/simple_merge/.git

dulwich/tests/data/repos/simple_merge/.git/HEAD

dulwich/tests/data/repos/simple_merge/.git/index

dulwich/tests/data/repos/simple_merge/.git/objects

dulwich/tests/data/repos/simple_merge/.git/objects/0d

dulwich/tests/data/repos/simple_merge/.git/objects/0d/89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/tests/data/repos/simple_merge/.git/objects/1b

dulwich/tests/data/repos/simple_merge/.git/objects/1b/6318f651a534b38f9c7aedeebbd56c1e896853

dulwich/tests/data/repos/simple_merge/.git/objects/29

dulwich/tests/data/repos/simple_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/tests/data/repos/simple_merge/.git/objects/4c

dulwich/tests/data/repos/simple_merge/.git/objects/4c/ffe90e0a41ad3f5190079d7c8f036bde29cbe6

dulwich/tests/data/repos/simple_merge/.git/objects/5d

dulwich/tests/data/repos/simple_merge/.git/objects/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/tests/data/repos/simple_merge/.git/objects/60

dulwich/tests/data/repos/simple_merge/.git/objects/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/tests/data/repos/simple_merge/.git/objects/6f

dulwich/tests/data/repos/simple_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/tests/data/repos/simple_merge/.git/objects/70

dulwich/tests/data/repos/simple_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/tests/data/repos/simple_merge/.git/objects/90

dulwich/tests/data/repos/simple_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/tests/data/repos/simple_merge/.git/objects/95

dulwich/tests/data/repos/simple_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/tests/data/repos/simple_merge/.git/objects/ab

dulwich/tests/data/repos/simple_merge/.git/objects/ab/64bbdcc51b170d21588e5c5d391ee5c0c96dfd

dulwich/tests/data/repos/simple_merge/.git/objects/d4

dulwich/tests/data/repos/simple_merge/.git/objects/d4/bdad6549dfedf25d3b89d21f506aff575b28a7

dulwich/tests/data/repos/simple_merge/.git/objects/d8

dulwich/tests/data/repos/simple_merge/.git/objects/d8/0c186a03f423a81b39df39dc87fd269736ca86

dulwich/tests/data/repos/simple_merge/.git/objects/e6

dulwich/tests/data/repos/simple_merge/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/tests/data/repos/simple_merge/.git/objects/info

dulwich/tests/data/repos/simple_merge/.git/objects/pack

dulwich/tests/data/repos/simple_merge/.git/refs

dulwich/tests/data/repos/simple_merge/.git/refs/heads

dulwich/tests/data/repos/simple_merge/.git/refs/heads/master

dulwich/tests/data/repos/simple_merge/.git/refs/tags

dulwich/tests/data/repos/simple_merge/a

dulwich/tests/data/repos/simple_merge/b

dulwich/tests/data/repos/simple_merge/d

dulwich/tests/data/repos/simple_merge/e

dulwich/tests/data/trees

dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/tests/test_objects.py

dulwich/tests/test_pack.py

dulwich/tests/test_repository.py

setup.py

files removed:

.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

dulwich/pack.py

# pack.py -- For dealing wih packed git objects.

# The code is loosely based on that in the sha1_file.c file from git itself,

# which is Copyright (C) Linus Torvalds, 2005 and distributed under the

# GPL version 2.

# This program is free software; you can redistribute it and/or

# modify it under the terms of the GNU General Public License

# as published by the Free Software Foundation; version 2

# of the License.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,

# MA 02110-1301, USA.

"""Classes for dealing with packed git objects.

A pack is a compact representation of a bunch of objects, stored

using deltas where possible.

They have two parts, the pack file, which stores the data, and an index

that tells you where the data is.

To find an object you look in all of the index files 'til you find a

match for the object name. You then use the pointer got from this as

a pointer in to the corresponding packfile.

"""

import mmap

import os

import sys

supports_mmap_offset = (sys.version_info[0] >= 3 or

(sys.version_info[0] == 2 and sys.version_info[1] >= 6))

from objects import (ShaFile,

_decompress,

)

hex_to_sha = lambda hex: int(hex, 16)

MAX_MMAP_SIZE = 256 * 1024 * 1024

def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):

if offset+size > MAX_MMAP_SIZE and not supports_mmap_offset:

raise AssertionError("%s is larger than 256 meg, and this version "

"of Python does not support the offset argument to mmap().")

if supports_mmap_offset:

return mmap.mmap(f.fileno(), size, access=access, offset=offset)

else:

class ArraySkipper(object):

def __init__(self, array, offset):

self.array = array

self.offset = offset

def __getslice__(self, i, j):

return self.array[i+self.offset:j+self.offset]

def __getitem__(self, i):

return self.array[i+self.offset]

mem = mmap.mmap(f.fileno(), size, access=access)

if offset == 0:

return mem

return ArraySkipper(mem, offset)

def multi_ord(map, start, count):

value = 0

for i in range(count):

value = value * 0x100 + ord(map[start+i])

return value

class PackIndex(object):

"""An index in to a packfile.

Given a sha id of an object a pack index can tell you the location in the

packfile of that object if it has it.

To do the loop it opens the file, and indexes first 256 4 byte groups

with the first byte of the sha id. The value in the four byte group indexed

is the end of the group that shares the same starting byte. Subtract one

from the starting byte and index again to find the start of the group.

The values are sorted by sha id within the group, so do the math to find

the start and end offset and then bisect in to find if the value is present.

"""

header_record_size = 4

header_size = 256 * header_record_size

index_size = 4

sha_bytes = 20

100

record_size = sha_bytes + index_size

101

102

def __init__(self, filename):

103

"""Create a pack index object.

104

105

Provide it with the name of the index file to consider, and it will map

106

it whenever required.

107

"""

108

self._filename = filename

109

assert os.path.exists(filename), "%s is not a pack index" % filename

110

# Take the size now, so it can be checked each time we map the file to

111

# ensure that it hasn't changed.

112

self._size = os.path.getsize(filename)

113

assert self._size > self.header_size, "%s is too small to be a packfile" % \

114

filename

115

116

def object_index(self, sha):

117

"""Return the index in to the corresponding packfile for the object.

118

119

Given the name of an object it will return the offset that object lives

120

at within the corresponding pack file. If the pack file doesn't have the

121

object then None will be returned.

122

"""

123

size = os.path.getsize(self._filename)

124

assert size == self._size, "Pack index %s has changed size, I don't " \

125

"like that" % self._filename

126

f = open(self._filename, 'rb')

127

try:

128

map = simple_mmap(f, 0, size)

129

return self._object_index(map, sha)

130

finally:

131

f.close()

132

133

def _object_index(self, map, hexsha):

134

"""See object_index"""

135

first_byte = hex_to_sha(hexsha[:2])

136

header_offset = self.header_record_size * first_byte

137

start = multi_ord(map, header_offset-self.header_record_size, self.header_record_size)

138

end = multi_ord(map, header_offset, self.header_record_size)

139

sha = hex_to_sha(hexsha)

140

while start < end:

141

i = (start + end)/2

142

offset = self.header_size + (i * self.record_size)

143

file_sha = multi_ord(map, offset + self.index_size, self.sha_bytes)

144

if file_sha == sha:

145

return multi_ord(map, offset, self.index_size)

146

elif file_sha < sha:

147

start = offset + 1

148

else:

149

end = offset - 1

150

return None

151

152

153

class PackData(object):

154

"""The data contained in a packfile.

155

156

Pack files can be accessed both sequentially for exploding a pack, and

157

directly with the help of an index to retrieve a specific object.

158

159

The objects within are either complete or a delta aginst another.

160

161

The header is variable length. If the MSB of each byte is set then it

162

indicates that the subsequent byte is still part of the header.

163

For the first byte the next MS bits are the type, which tells you the type

164

of object, and whether it is a delta. The LS byte is the lowest bits of the

165

size. For each subsequent byte the LS 7 bits are the next MS bits of the

166

size, i.e. the last byte of the header contains the MS bits of the size.

167

168

For the complete objects the data is stored as zlib deflated data.

169

The size in the header is the uncompressed object size, so to uncompress

170

you need to just keep feeding data to zlib until you get an object back,

171

or it errors on bad data. This is done here by just giving the complete

172

buffer from the start of the deflated object on. This is bad, but until I

173

get mmap sorted out it will have to do.

174

175

Currently there are no integrity checks done. Also no attempt is made to try

176

and detect the delta case, or a request for an object at the wrong position.

177

It will all just throw a zlib or KeyError.

178

"""

179

180

def __init__(self, filename):

181

"""Create a PackData object that represents the pack in the given filename.

182

183

The file must exist and stay readable until the object is disposed of. It

184

must also stay the same size. It will be mapped whenever needed.

185

186

Currently there is a restriction on the size of the pack as the python

187

mmap implementation is flawed.

188

"""

189

self._filename = filename

190

assert os.path.exists(filename), "%s is not a packfile" % filename

191

self._size = os.path.getsize(filename)

192

193

def get_object_at(self, offset):

194

"""Given an offset in to the packfile return the object that is there.

195

196

Using the associated index the location of an object can be looked up, and

197

then the packfile can be asked directly for that object using this

198

function.

199

200

Currently only non-delta objects are supported.

201

"""

202

assert isinstance(offset, long) or isinstance(offset, int)

203

size = os.path.getsize(self._filename)

204

assert size == self._size, "Pack data %s has changed size, I don't " \

205

"like that" % self._filename

206

f = open(self._filename, 'rb')

207

try:

208

map = simple_mmap(f, offset, size)

209

return self._get_object_at(map)

210

finally:

211

f.close()

212

213

def _get_object_at(self, map):

214

first_byte = ord(map[0])

215

sign_extend = first_byte & 0x80

216

type = (first_byte >> 4) & 0x07

217

size = first_byte & 0x0f

218

cur_offset = 0

219

while sign_extend > 0:

220

byte = ord(map[cur_offset+1])

221

sign_extend = byte & 0x80

222

size_part = byte & 0x7f

223

size += size_part << ((cur_offset * 7) + 4)

224

cur_offset += 1

225

raw_base = cur_offset+1

226

# The size is the inflated size, so we have no idea what the deflated size

227

# is, so for now give it as much as we have. It should really iterate

228

# feeding it more data if it doesn't decompress, but as we have the whole

229

# thing then just use it.

230

raw = map[raw_base:]

231

uncomp = _decompress(raw)

232

obj = ShaFile.from_raw_string(type, uncomp)

233

return obj

234

Older »