bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
0.211.9
by James Westby
Add some basic pack handling code. |
1 |
# pack.py -- For dealing wih packed git objects.
|
2 |
# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
|
|
|
0.211.20
by Jelmer Vernooij
Change project name to dulwich everywhere, add assertion. |
3 |
# Copryight (C) 2008 Jelmer Vernooij <jelmer@samba.org>
|
|
0.211.9
by James Westby
Add some basic pack handling code. |
4 |
# The code is loosely based on that in the sha1_file.c file from git itself,
|
5 |
# which is Copyright (C) Linus Torvalds, 2005 and distributed under the
|
|
6 |
# GPL version 2.
|
|
7 |
#
|
|
8 |
# This program is free software; you can redistribute it and/or
|
|
9 |
# modify it under the terms of the GNU General Public License
|
|
10 |
# as published by the Free Software Foundation; version 2
|
|
11 |
# of the License.
|
|
12 |
#
|
|
13 |
# This program is distributed in the hope that it will be useful,
|
|
14 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16 |
# GNU General Public License for more details.
|
|
17 |
#
|
|
18 |
# You should have received a copy of the GNU General Public License
|
|
19 |
# along with this program; if not, write to the Free Software
|
|
20 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
21 |
# MA 02110-1301, USA.
|
|
22 |
||
23 |
"""Classes for dealing with packed git objects.
|
|
24 |
||
25 |
A pack is a compact representation of a bunch of objects, stored
|
|
26 |
using deltas where possible.
|
|
27 |
||
28 |
They have two parts, the pack file, which stores the data, and an index
|
|
29 |
that tells you where the data is.
|
|
30 |
||
31 |
To find an object you look in all of the index files 'til you find a
|
|
32 |
match for the object name. You then use the pointer got from this as
|
|
33 |
a pointer in to the corresponding packfile.
|
|
34 |
"""
|
|
35 |
||
|
0.211.26
by Jelmer Vernooij
Write fan out table correctly. |
36 |
from collections import defaultdict |
|
0.211.23
by Jelmer Vernooij
Add functions for checking checksums of index files. |
37 |
import hashlib |
|
0.211.9
by James Westby
Add some basic pack handling code. |
38 |
import mmap |
39 |
import os |
|
|
0.211.21
by Jelmer Vernooij
Read pack header. |
40 |
import struct |
|
0.211.17
by Jelmer Vernooij
Add simple mmap wrapper. |
41 |
import sys |
42 |
||
43 |
supports_mmap_offset = (sys.version_info[0] >= 3 or |
|
44 |
(sys.version_info[0] == 2 and sys.version_info[1] >= 6)) |
|
|
0.211.9
by James Westby
Add some basic pack handling code. |
45 |
|
46 |
from objects import (ShaFile, |
|
47 |
_decompress, |
|
48 |
)
|
|
49 |
||
|
0.211.24
by Jelmer Vernooij
Add iterentries(). |
50 |
def hex_to_sha(hex): |
51 |
ret = "" |
|
52 |
for i in range(0, len(hex), 2): |
|
53 |
ret += chr(int(hex[i:i+2], 16)) |
|
54 |
return ret |
|
|
0.211.9
by James Westby
Add some basic pack handling code. |
55 |
|
|
0.211.35
by Jelmer Vernooij
Add simple pack dump utility. |
56 |
def sha_to_hex(sha): |
57 |
ret = "" |
|
58 |
for i in sha: |
|
59 |
ret += "%02x" % ord(i) |
|
60 |
return ret |
|
61 |
||
|
0.211.20
by Jelmer Vernooij
Change project name to dulwich everywhere, add assertion. |
62 |
MAX_MMAP_SIZE = 256 * 1024 * 1024 |
63 |
||
|
0.211.17
by Jelmer Vernooij
Add simple mmap wrapper. |
64 |
def simple_mmap(f, offset, size, access=mmap.ACCESS_READ): |
65 |
if offset+size > MAX_MMAP_SIZE and not supports_mmap_offset: |
|
66 |
raise AssertionError("%s is larger than 256 meg, and this version " |
|
67 |
"of Python does not support the offset argument to mmap().") |
|
68 |
if supports_mmap_offset: |
|
|
0.211.19
by Jelmer Vernooij
Provide replacement for mmap()'s offset argument. |
69 |
return mmap.mmap(f.fileno(), size, access=access, offset=offset) |
|
0.211.17
by Jelmer Vernooij
Add simple mmap wrapper. |
70 |
else: |
|
0.211.19
by Jelmer Vernooij
Provide replacement for mmap()'s offset argument. |
71 |
class ArraySkipper(object): |
72 |
||
73 |
def __init__(self, array, offset): |
|
74 |
self.array = array |
|
75 |
self.offset = offset |
|
76 |
||
77 |
def __getslice__(self, i, j): |
|
78 |
return self.array[i+self.offset:j+self.offset] |
|
79 |
||
80 |
def __getitem__(self, i): |
|
81 |
return self.array[i+self.offset] |
|
82 |
||
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
83 |
def __len__(self): |
84 |
return len(self.array) - self.offset |
|
85 |
||
|
0.211.23
by Jelmer Vernooij
Add functions for checking checksums of index files. |
86 |
def __str__(self): |
87 |
return str(self.array[self.offset:]) |
|
88 |
||
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
89 |
mem = mmap.mmap(f.fileno(), size+offset, access=access) |
|
0.211.19
by Jelmer Vernooij
Provide replacement for mmap()'s offset argument. |
90 |
if offset == 0: |
91 |
return mem |
|
92 |
return ArraySkipper(mem, offset) |
|
|
0.211.17
by Jelmer Vernooij
Add simple mmap wrapper. |
93 |
|
94 |
||
|
0.211.9
by James Westby
Add some basic pack handling code. |
95 |
def multi_ord(map, start, count): |
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
96 |
value = 0 |
97 |
for i in range(count): |
|
98 |
value = value * 0x100 + ord(map[start+i]) |
|
99 |
return value |
|
100 |
||
|
0.211.9
by James Westby
Add some basic pack handling code. |
101 |
|
102 |
class PackIndex(object): |
|
103 |
"""An index in to a packfile. |
|
104 |
||
105 |
Given a sha id of an object a pack index can tell you the location in the
|
|
106 |
packfile of that object if it has it.
|
|
107 |
||
|
0.211.20
by Jelmer Vernooij
Change project name to dulwich everywhere, add assertion. |
108 |
To do the loop it opens the file, and indexes first 256 4 byte groups
|
|
0.211.9
by James Westby
Add some basic pack handling code. |
109 |
with the first byte of the sha id. The value in the four byte group indexed
|
110 |
is the end of the group that shares the same starting byte. Subtract one
|
|
111 |
from the starting byte and index again to find the start of the group.
|
|
112 |
The values are sorted by sha id within the group, so do the math to find
|
|
113 |
the start and end offset and then bisect in to find if the value is present.
|
|
114 |
"""
|
|
115 |
||
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
116 |
PACK_INDEX_HEADER_SIZE = 0x100 * 4 |
|
0.211.9
by James Westby
Add some basic pack handling code. |
117 |
sha_bytes = 20 |
|
0.211.24
by Jelmer Vernooij
Add iterentries(). |
118 |
record_size = sha_bytes + 4 |
|
0.211.9
by James Westby
Add some basic pack handling code. |
119 |
|
120 |
def __init__(self, filename): |
|
121 |
"""Create a pack index object. |
|
122 |
||
123 |
Provide it with the name of the index file to consider, and it will map
|
|
124 |
it whenever required.
|
|
125 |
"""
|
|
126 |
self._filename = filename |
|
127 |
assert os.path.exists(filename), "%s is not a pack index" % filename |
|
128 |
# Take the size now, so it can be checked each time we map the file to
|
|
129 |
# ensure that it hasn't changed.
|
|
130 |
self._size = os.path.getsize(filename) |
|
|
0.211.24
by Jelmer Vernooij
Add iterentries(). |
131 |
self._file = open(filename, 'r') |
132 |
self._contents = simple_mmap(self._file, 0, self._size) |
|
|
0.211.32
by Jelmer Vernooij
Fix v2 index files. |
133 |
if self._contents[:4] != '\377tOc': |
134 |
self.version = 1 |
|
|
0.211.30
by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2. |
135 |
self._fan_out_table = self._read_fan_out_table(0) |
136 |
else: |
|
|
0.211.32
by Jelmer Vernooij
Fix v2 index files. |
137 |
(self.version, ) = struct.unpack_from(">L", self._contents, 4) |
138 |
assert self.version in (2,), "Version was %d" % self.version |
|
|
0.211.30
by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2. |
139 |
self._fan_out_table = self._read_fan_out_table(8) |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
140 |
self._name_table_offset = 8 + 0x100 * 4 |
141 |
self._crc32_table_offset = self._name_table_offset + 20 * len(self) |
|
142 |
self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self) |
|
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
143 |
|
|
0.211.24
by Jelmer Vernooij
Add iterentries(). |
144 |
def close(self): |
145 |
self._file.close() |
|
146 |
||
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
147 |
def __len__(self): |
|
0.211.24
by Jelmer Vernooij
Add iterentries(). |
148 |
"""Return the number of entries in this pack index.""" |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
149 |
return self._fan_out_table[-1] |
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
150 |
|
|
0.211.24
by Jelmer Vernooij
Add iterentries(). |
151 |
def _unpack_entry(self, i): |
152 |
"""Unpack the i-th entry in the index file. |
|
153 |
||
|
0.211.30
by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2. |
154 |
:return: Tuple with object name (SHA), offset in pack file and
|
155 |
CRC32 checksum (if known)."""
|
|
|
0.211.32
by Jelmer Vernooij
Fix v2 index files. |
156 |
if self.version == 1: |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
157 |
(offset, name) = struct.unpack_from(">L20s", self._contents, |
158 |
self.PACK_INDEX_HEADER_SIZE + (i * self.record_size)) |
|
159 |
return (name, offset, None) |
|
160 |
else: |
|
161 |
return (self._unpack_name(i), self._unpack_offset(i), |
|
162 |
self._unpack_crc32_checksum(i)) |
|
163 |
||
164 |
def _unpack_name(self, i): |
|
|
0.211.32
by Jelmer Vernooij
Fix v2 index files. |
165 |
if self.version == 1: |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
166 |
return self._unpack_entry(i)[0] |
167 |
else: |
|
168 |
return struct.unpack_from("20s", self._contents, |
|
|
0.211.32
by Jelmer Vernooij
Fix v2 index files. |
169 |
self._name_table_offset + i * 20)[0] |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
170 |
|
171 |
def _unpack_offset(self, i): |
|
|
0.211.32
by Jelmer Vernooij
Fix v2 index files. |
172 |
if self.version == 1: |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
173 |
return self._unpack_entry(i)[1] |
174 |
else: |
|
175 |
return struct.unpack_from(">L", self._contents, |
|
|
0.211.32
by Jelmer Vernooij
Fix v2 index files. |
176 |
self._pack_offset_table_offset + i * 4)[0] |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
177 |
|
178 |
def _unpack_crc32_checksum(self, i): |
|
|
0.211.32
by Jelmer Vernooij
Fix v2 index files. |
179 |
if self.version == 1: |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
180 |
return None |
181 |
else: |
|
182 |
return struct.unpack_from(">L", self._contents, |
|
|
0.211.32
by Jelmer Vernooij
Fix v2 index files. |
183 |
self._crc32_table_offset + i * 4)[0] |
|
0.211.24
by Jelmer Vernooij
Add iterentries(). |
184 |
|
185 |
def iterentries(self): |
|
186 |
"""Iterate over the entries in this pack index. |
|
187 |
|
|
|
0.211.30
by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2. |
188 |
Will yield tuples with object name, offset in packfile and crc32 checksum.
|
|
0.211.24
by Jelmer Vernooij
Add iterentries(). |
189 |
"""
|
190 |
for i in range(len(self)): |
|
191 |
yield self._unpack_entry(i) |
|
192 |
||
|
0.211.30
by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2. |
193 |
def _read_fan_out_table(self, start_offset): |
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
194 |
ret = [] |
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
195 |
for i in range(0x100): |
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
196 |
ret.append(struct.unpack(">L", self._contents[start_offset+i*4:start_offset+(i+1)*4])[0]) |
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
197 |
return ret |
198 |
||
|
0.211.23
by Jelmer Vernooij
Add functions for checking checksums of index files. |
199 |
def check(self): |
200 |
"""Check that the stored checksum matches the actual checksum.""" |
|
|
0.211.28
by Jelmer Vernooij
Cope with pack checksum in index file. |
201 |
return self.calculate_checksum() == self.get_stored_checksums()[1] |
|
0.211.23
by Jelmer Vernooij
Add functions for checking checksums of index files. |
202 |
|
|
0.211.28
by Jelmer Vernooij
Cope with pack checksum in index file. |
203 |
def calculate_checksum(self): |
|
0.211.23
by Jelmer Vernooij
Add functions for checking checksums of index files. |
204 |
f = open(self._filename, 'r') |
205 |
try: |
|
|
0.211.24
by Jelmer Vernooij
Add iterentries(). |
206 |
return hashlib.sha1(self._contents[:-20]).digest() |
|
0.211.23
by Jelmer Vernooij
Add functions for checking checksums of index files. |
207 |
finally: |
208 |
f.close() |
|
209 |
||
|
0.211.28
by Jelmer Vernooij
Cope with pack checksum in index file. |
210 |
def get_stored_checksums(self): |
211 |
"""Return the SHA1 checksums stored for the corresponding packfile and |
|
212 |
this header file itself."""
|
|
213 |
return str(self._contents[-40:-20]), str(self._contents[-20:]) |
|
|
0.211.9
by James Westby
Add some basic pack handling code. |
214 |
|
215 |
def object_index(self, sha): |
|
216 |
"""Return the index in to the corresponding packfile for the object. |
|
217 |
||
218 |
Given the name of an object it will return the offset that object lives
|
|
219 |
at within the corresponding pack file. If the pack file doesn't have the
|
|
220 |
object then None will be returned.
|
|
221 |
"""
|
|
222 |
size = os.path.getsize(self._filename) |
|
223 |
assert size == self._size, "Pack index %s has changed size, I don't " \ |
|
224 |
"like that" % self._filename |
|
|
0.211.24
by Jelmer Vernooij
Add iterentries(). |
225 |
return self._object_index(sha) |
|
0.211.9
by James Westby
Add some basic pack handling code. |
226 |
|
|
0.211.24
by Jelmer Vernooij
Add iterentries(). |
227 |
def _object_index(self, hexsha): |
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
228 |
"""See object_index""" |
229 |
sha = hex_to_sha(hexsha) |
|
|
0.211.24
by Jelmer Vernooij
Add iterentries(). |
230 |
start = self._fan_out_table[ord(sha[0])-1] |
231 |
end = self._fan_out_table[ord(sha[0])] |
|
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
232 |
while start < end: |
233 |
i = (start + end)/2 |
|
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
234 |
file_sha = self._unpack_name(i) |
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
235 |
if file_sha == sha: |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
236 |
return self._unpack_offset(i) |
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
237 |
elif file_sha < sha: |
|
0.211.30
by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2. |
238 |
start = i + 1 |
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
239 |
else: |
|
0.211.30
by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2. |
240 |
end = i - 1 |
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
241 |
return None |
|
0.211.9
by James Westby
Add some basic pack handling code. |
242 |
|
243 |
||
244 |
class PackData(object): |
|
245 |
"""The data contained in a packfile. |
|
246 |
||
247 |
Pack files can be accessed both sequentially for exploding a pack, and
|
|
248 |
directly with the help of an index to retrieve a specific object.
|
|
249 |
||
250 |
The objects within are either complete or a delta aginst another.
|
|
251 |
||
252 |
The header is variable length. If the MSB of each byte is set then it
|
|
253 |
indicates that the subsequent byte is still part of the header.
|
|
254 |
For the first byte the next MS bits are the type, which tells you the type
|
|
255 |
of object, and whether it is a delta. The LS byte is the lowest bits of the
|
|
256 |
size. For each subsequent byte the LS 7 bits are the next MS bits of the
|
|
257 |
size, i.e. the last byte of the header contains the MS bits of the size.
|
|
258 |
||
259 |
For the complete objects the data is stored as zlib deflated data.
|
|
260 |
The size in the header is the uncompressed object size, so to uncompress
|
|
261 |
you need to just keep feeding data to zlib until you get an object back,
|
|
262 |
or it errors on bad data. This is done here by just giving the complete
|
|
263 |
buffer from the start of the deflated object on. This is bad, but until I
|
|
264 |
get mmap sorted out it will have to do.
|
|
265 |
||
266 |
Currently there are no integrity checks done. Also no attempt is made to try
|
|
267 |
and detect the delta case, or a request for an object at the wrong position.
|
|
268 |
It will all just throw a zlib or KeyError.
|
|
269 |
"""
|
|
270 |
||
271 |
def __init__(self, filename): |
|
272 |
"""Create a PackData object that represents the pack in the given filename. |
|
273 |
||
274 |
The file must exist and stay readable until the object is disposed of. It
|
|
275 |
must also stay the same size. It will be mapped whenever needed.
|
|
276 |
||
277 |
Currently there is a restriction on the size of the pack as the python
|
|
278 |
mmap implementation is flawed.
|
|
279 |
"""
|
|
280 |
self._filename = filename |
|
281 |
assert os.path.exists(filename), "%s is not a packfile" % filename |
|
282 |
self._size = os.path.getsize(filename) |
|
|
0.211.21
by Jelmer Vernooij
Read pack header. |
283 |
self._read_header() |
284 |
||
285 |
def _read_header(self): |
|
286 |
f = open(self._filename, 'rb') |
|
287 |
try: |
|
288 |
header = f.read(12) |
|
|
0.211.34
by Jelmer Vernooij
Implement PackData.check(). |
289 |
f.seek(self._size-20) |
290 |
self._stored_checksum = f.read(20) |
|
|
0.211.21
by Jelmer Vernooij
Read pack header. |
291 |
finally: |
292 |
f.close() |
|
293 |
assert header[:4] == "PACK" |
|
294 |
(version,) = struct.unpack_from(">L", header, 4) |
|
295 |
assert version in (2, 3), "Version was %d" % version |
|
296 |
(self._num_objects,) = struct.unpack_from(">L", header, 8) |
|
297 |
||
298 |
def __len__(self): |
|
299 |
"""Returns the number of objects in this pack.""" |
|
300 |
return self._num_objects |
|
|
0.211.9
by James Westby
Add some basic pack handling code. |
301 |
|
|
0.211.34
by Jelmer Vernooij
Implement PackData.check(). |
302 |
def calculate_checksum(self): |
303 |
f = open(self._filename, 'rb') |
|
304 |
try: |
|
305 |
map = simple_mmap(f, 0, self._size) |
|
306 |
return hashlib.sha1(map[:-20]).digest() |
|
307 |
finally: |
|
308 |
f.close() |
|
309 |
||
310 |
def check(self): |
|
311 |
return (self.calculate_checksum() == self._stored_checksum) |
|
312 |
||
|
0.211.9
by James Westby
Add some basic pack handling code. |
313 |
def get_object_at(self, offset): |
314 |
"""Given an offset in to the packfile return the object that is there. |
|
315 |
||
316 |
Using the associated index the location of an object can be looked up, and
|
|
317 |
then the packfile can be asked directly for that object using this
|
|
318 |
function.
|
|
319 |
||
320 |
Currently only non-delta objects are supported.
|
|
321 |
"""
|
|
|
0.211.20
by Jelmer Vernooij
Change project name to dulwich everywhere, add assertion. |
322 |
assert isinstance(offset, long) or isinstance(offset, int) |
|
0.211.9
by James Westby
Add some basic pack handling code. |
323 |
size = os.path.getsize(self._filename) |
324 |
assert size == self._size, "Pack data %s has changed size, I don't " \ |
|
325 |
"like that" % self._filename |
|
326 |
f = open(self._filename, 'rb') |
|
327 |
try: |
|
|
0.211.23
by Jelmer Vernooij
Add functions for checking checksums of index files. |
328 |
map = simple_mmap(f, offset, size-offset) |
|
0.211.19
by Jelmer Vernooij
Provide replacement for mmap()'s offset argument. |
329 |
return self._get_object_at(map) |
|
0.211.9
by James Westby
Add some basic pack handling code. |
330 |
finally: |
331 |
f.close() |
|
332 |
||
|
0.211.19
by Jelmer Vernooij
Provide replacement for mmap()'s offset argument. |
333 |
def _get_object_at(self, map): |
334 |
first_byte = ord(map[0]) |
|
|
0.211.9
by James Westby
Add some basic pack handling code. |
335 |
sign_extend = first_byte & 0x80 |
336 |
type = (first_byte >> 4) & 0x07 |
|
337 |
size = first_byte & 0x0f |
|
338 |
cur_offset = 0 |
|
339 |
while sign_extend > 0: |
|
|
0.211.19
by Jelmer Vernooij
Provide replacement for mmap()'s offset argument. |
340 |
byte = ord(map[cur_offset+1]) |
|
0.211.9
by James Westby
Add some basic pack handling code. |
341 |
sign_extend = byte & 0x80 |
342 |
size_part = byte & 0x7f |
|
343 |
size += size_part << ((cur_offset * 7) + 4) |
|
344 |
cur_offset += 1 |
|
|
0.211.19
by Jelmer Vernooij
Provide replacement for mmap()'s offset argument. |
345 |
raw_base = cur_offset+1 |
|
0.211.9
by James Westby
Add some basic pack handling code. |
346 |
# The size is the inflated size, so we have no idea what the deflated size
|
347 |
# is, so for now give it as much as we have. It should really iterate
|
|
348 |
# feeding it more data if it doesn't decompress, but as we have the whole
|
|
349 |
# thing then just use it.
|
|
350 |
raw = map[raw_base:] |
|
351 |
uncomp = _decompress(raw) |
|
352 |
obj = ShaFile.from_raw_string(type, uncomp) |
|
353 |
return obj |
|
354 |
||
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
355 |
|
|
0.211.34
by Jelmer Vernooij
Implement PackData.check(). |
356 |
class SHA1Writer(object): |
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
357 |
|
358 |
def __init__(self, f): |
|
359 |
self.f = f |
|
360 |
self.sha1 = hashlib.sha1("") |
|
361 |
||
362 |
def write(self, data): |
|
363 |
self.sha1.update(data) |
|
364 |
self.f.write(data) |
|
365 |
||
366 |
def close(self): |
|
367 |
sha = self.sha1.digest() |
|
368 |
assert len(sha) == 20 |
|
369 |
self.f.write(sha) |
|
370 |
self.f.close() |
|
371 |
return sha |
|
372 |
||
373 |
||
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
374 |
def write_pack(filename, objects): |
375 |
"""Write a new pack file. |
|
376 |
||
377 |
:param filename: The filename of the new pack file.
|
|
378 |
:param objects: List of objects to write.
|
|
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
379 |
:return: List with (name, offset, crc32 checksum) entries, pack checksum
|
|
0.211.22
by Jelmer Vernooij
Read fan-out table always, read file sha1. |
380 |
"""
|
381 |
f = open(filename, 'w') |
|
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
382 |
entries = [] |
383 |
f = SHA1Writer(f) |
|
384 |
f.write("PACK") # Pack header |
|
385 |
f.write(struct.pack(">L", 2)) # Pack version |
|
386 |
f.write(struct.pack(">L", len(objects))) # Number of objects in pack |
|
387 |
for o in objects: |
|
388 |
pass # FIXME: Write object |
|
389 |
return entries, f.close() |
|
|
0.211.25
by Jelmer Vernooij
Implement pack index writing. |
390 |
|
391 |
||
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
392 |
def write_pack_index_v1(filename, entries, pack_checksum): |
|
0.211.25
by Jelmer Vernooij
Implement pack index writing. |
393 |
"""Write a new pack index file. |
394 |
||
395 |
:param filename: The filename of the new pack index file.
|
|
|
0.211.30
by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2. |
396 |
:param entries: List of tuples with object name (sha), offset_in_pack, and
|
397 |
crc32_checksum.
|
|
|
0.211.28
by Jelmer Vernooij
Cope with pack checksum in index file. |
398 |
:param pack_checksum: Checksum of the pack file.
|
|
0.211.25
by Jelmer Vernooij
Implement pack index writing. |
399 |
"""
|
400 |
# Sort entries first
|
|
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
401 |
|
|
0.211.30
by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2. |
402 |
entries = sorted(entries) |
|
0.211.25
by Jelmer Vernooij
Implement pack index writing. |
403 |
f = open(filename, 'w') |
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
404 |
f = SHA1Writer(f) |
|
0.211.26
by Jelmer Vernooij
Write fan out table correctly. |
405 |
fan_out_table = defaultdict(lambda: 0) |
|
0.211.30
by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2. |
406 |
for (name, offset, entry_checksum) in entries: |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
407 |
fan_out_table[ord(name[0])] += 1 |
|
0.211.25
by Jelmer Vernooij
Implement pack index writing. |
408 |
# Fan-out table
|
409 |
for i in range(0x100): |
|
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
410 |
f.write(struct.pack(">L", fan_out_table[i])) |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
411 |
fan_out_table[i+1] += fan_out_table[i] |
|
0.211.30
by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2. |
412 |
for (name, offset, entry_checksum) in entries: |
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
413 |
f.write(struct.pack(">L20s", offset, name)) |
|
0.211.28
by Jelmer Vernooij
Cope with pack checksum in index file. |
414 |
assert len(pack_checksum) == 20 |
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
415 |
f.write(pack_checksum) |
|
0.211.25
by Jelmer Vernooij
Implement pack index writing. |
416 |
f.close() |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
417 |
|
418 |
||
419 |
def write_pack_index_v2(filename, entries, pack_checksum): |
|
420 |
"""Write a new pack index file. |
|
421 |
||
422 |
:param filename: The filename of the new pack index file.
|
|
423 |
:param entries: List of tuples with object name (sha), offset_in_pack, and
|
|
424 |
crc32_checksum.
|
|
425 |
:param pack_checksum: Checksum of the pack file.
|
|
426 |
"""
|
|
427 |
# Sort entries first
|
|
428 |
entries = sorted(entries) |
|
429 |
f = open(filename, 'w') |
|
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
430 |
f = SHA1Writer(f) |
431 |
f.write('\377tOc') |
|
432 |
f.write(struct.pack(">L", 2)) |
|
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
433 |
fan_out_table = defaultdict(lambda: 0) |
434 |
for (name, offset, entry_checksum) in entries: |
|
435 |
fan_out_table[ord(name[0])] += 1 |
|
436 |
# Fan-out table
|
|
437 |
for i in range(0x100): |
|
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
438 |
f.write(struct.pack(">L", fan_out_table[i])) |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
439 |
fan_out_table[i+1] += fan_out_table[i] |
440 |
for (name, offset, entry_checksum) in entries: |
|
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
441 |
f.write(name) |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
442 |
for (name, offset, entry_checksum) in entries: |
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
443 |
f.write(struct.pack(">L", entry_checksum)) |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
444 |
for (name, offset, entry_checksum) in entries: |
445 |
# FIXME: handle if MSBit is set in offset
|
|
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
446 |
f.write(struct.pack(">L", offset)) |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
447 |
# FIXME: handle table for pack files > 8 Gb
|
448 |
assert len(pack_checksum) == 20 |
|
|
0.211.33
by Jelmer Vernooij
Use common object for writing sha1 files. |
449 |
f.write(pack_checksum) |
|
0.211.31
by Jelmer Vernooij
More work on v2 packs. |
450 |
f.close() |
451 |