181
106
it whenever required.
183
108
self._filename = filename
109
assert os.path.exists(filename), "%s is not a pack index" % filename
184
110
# Take the size now, so it can be checked each time we map the file to
185
111
# ensure that it hasn't changed.
186
112
self._size = os.path.getsize(filename)
187
self._file = open(filename, 'r')
188
self._contents = simple_mmap(self._file, 0, self._size)
189
if self._contents[:4] != '\377tOc':
191
self._fan_out_table = self._read_fan_out_table(0)
193
(self.version, ) = struct.unpack_from(">L", self._contents, 4)
194
assert self.version in (2,), "Version was %d" % self.version
195
self._fan_out_table = self._read_fan_out_table(8)
196
self._name_table_offset = 8 + 0x100 * 4
197
self._crc32_table_offset = self._name_table_offset + 20 * len(self)
198
self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
200
def __eq__(self, other):
201
if type(self) != type(other):
204
if self._fan_out_table != other._fan_out_table:
207
for (name1, _, _), (name2, _, _) in izip(self.iterentries(), other.iterentries()):
216
"""Return the number of entries in this pack index."""
217
return self._fan_out_table[-1]
219
def _unpack_entry(self, i):
220
"""Unpack the i-th entry in the index file.
222
:return: Tuple with object name (SHA), offset in pack file and
223
CRC32 checksum (if known)."""
224
if self.version == 1:
225
(offset, name) = struct.unpack_from(">L20s", self._contents,
226
(0x100 * 4) + (i * 24))
227
return (name, offset, None)
229
return (self._unpack_name(i), self._unpack_offset(i),
230
self._unpack_crc32_checksum(i))
232
def _unpack_name(self, i):
233
if self.version == 1:
234
return self._unpack_entry(i)[0]
236
return struct.unpack_from("20s", self._contents,
237
self._name_table_offset + i * 20)[0]
239
def _unpack_offset(self, i):
240
if self.version == 1:
241
return self._unpack_entry(i)[1]
243
return struct.unpack_from(">L", self._contents,
244
self._pack_offset_table_offset + i * 4)[0]
246
def _unpack_crc32_checksum(self, i):
247
if self.version == 1:
250
return struct.unpack_from(">L", self._contents,
251
self._crc32_table_offset + i * 4)[0]
254
return imap(sha_to_hex, self._itersha())
257
for i in range(len(self)):
258
yield self._unpack_name(i)
260
def objects_sha1(self):
261
return iter_sha1(self._itersha())
263
def iterentries(self):
264
"""Iterate over the entries in this pack index.
266
Will yield tuples with object name, offset in packfile and crc32 checksum.
268
for i in range(len(self)):
269
yield self._unpack_entry(i)
271
def _read_fan_out_table(self, start_offset):
273
for i in range(0x100):
274
ret.append(struct.unpack(">L", self._contents[start_offset+i*4:start_offset+(i+1)*4])[0])
278
"""Check that the stored checksum matches the actual checksum."""
279
return self.calculate_checksum() == self.get_stored_checksums()[1]
281
def calculate_checksum(self):
282
f = open(self._filename, 'r')
284
return hashlib.sha1(self._contents[:-20]).digest()
288
def get_stored_checksums(self):
289
"""Return the SHA1 checksums stored for the corresponding packfile and
290
this header file itself."""
291
return str(self._contents[-40:-20]), str(self._contents[-20:])
113
assert self._size > self.header_size, "%s is too small to be a packfile" % \
293
116
def object_index(self, sha):
294
117
"""Return the index in to the corresponding packfile for the object.
364
189
self._filename = filename
365
190
assert os.path.exists(filename), "%s is not a packfile" % filename
366
191
self._size = os.path.getsize(filename)
367
self._header_size = 12
368
assert self._size >= self._header_size, "%s is too small for a packfile" % filename
371
def _read_header(self):
372
f = open(self._filename, 'rb')
375
f.seek(self._size-20)
376
self._stored_checksum = f.read(20)
379
assert header[:4] == "PACK"
380
(version,) = struct.unpack_from(">L", header, 4)
381
assert version in (2, 3), "Version was %d" % version
382
(self._num_objects,) = struct.unpack_from(">L", header, 8)
385
"""Returns the number of objects in this pack."""
386
return self._num_objects
388
def calculate_checksum(self):
389
f = open(self._filename, 'rb')
391
map = simple_mmap(f, 0, self._size)
392
return hashlib.sha1(map[:-20]).digest()
396
def iterobjects(self):
397
offset = self._header_size
398
f = open(self._filename, 'rb')
399
for i in range(len(self)):
400
map = simple_mmap(f, offset, self._size-offset)
401
(type, obj, total_size) = self._unpack_object(map)
402
yield offset, type, obj
406
def iterentries(self, ext_resolve_ref=None):
409
postponed = defaultdict(list)
410
class Postpone(Exception):
411
"""Raised to postpone delta resolving."""
413
def get_ref_text(sha):
418
return ext_resolve_ref(sha)
421
raise Postpone, (sha, )
422
todo = list(self.iterobjects())
424
(offset, type, obj) = todo.pop(0)
425
at[offset] = (type, obj)
426
assert isinstance(offset, int)
427
assert isinstance(type, int)
428
assert isinstance(obj, tuple) or isinstance(obj, str)
430
type, obj = resolve_object(offset, type, obj, get_ref_text,
432
except Postpone, (sha, ):
433
postponed[sha].append((offset, type, obj))
435
shafile = ShaFile.from_raw_string(type, obj)
436
sha = shafile.sha().digest()
437
found[sha] = (type, obj)
438
yield sha, offset, shafile.crc32()
439
todo += postponed.get(sha, [])
441
raise KeyError([sha_to_hex(h) for h in postponed.keys()])
443
def sorted_entries(self, resolve_ext_ref=None):
444
ret = list(self.iterentries(resolve_ext_ref))
448
def create_index_v1(self, filename):
449
entries = self.sorted_entries()
450
write_pack_index_v1(filename, entries, self.calculate_checksum())
452
def create_index_v2(self, filename):
453
entries = self.sorted_entries()
454
write_pack_index_v2(filename, entries, self.calculate_checksum())
456
def get_stored_checksum(self):
457
return self._stored_checksum
460
return (self.calculate_checksum() == self.get_stored_checksum())
462
193
def get_object_at(self, offset):
463
194
"""Given an offset in to the packfile return the object that is there.
465
196
Using the associated index the location of an object can be looked up, and
466
197
then the packfile can be asked directly for that object using this
200
Currently only non-delta objects are supported.
469
assert isinstance(offset, long) or isinstance(offset, int),\
470
"offset was %r" % offset
471
assert offset >= self._header_size
202
assert isinstance(offset, long) or isinstance(offset, int)
472
203
size = os.path.getsize(self._filename)
473
204
assert size == self._size, "Pack data %s has changed size, I don't " \
474
205
"like that" % self._filename
475
206
f = open(self._filename, 'rb')
477
map = simple_mmap(f, offset, size-offset)
478
return self._unpack_object(map)[:2]
208
map = simple_mmap(f, offset, size)
209
return self._get_object_at(map)
482
def _unpack_object(self, map):
483
bytes = take_msb_bytes(map, 0)
484
type = (bytes[0] >> 4) & 0x07
485
size = bytes[0] & 0x0f
486
for i, byte in enumerate(bytes[1:]):
487
size += (byte & 0x7f) << ((i * 7) + 4)
488
raw_base = len(bytes)
489
if type == 6: # offset delta
490
bytes = take_msb_bytes(map, raw_base)
491
assert not (bytes[-1] & 0x80)
492
delta_base_offset = bytes[0] & 0x7f
493
for byte in bytes[1:]:
494
delta_base_offset += 1
495
delta_base_offset <<= 7
496
delta_base_offset += (byte & 0x7f)
498
uncomp, comp_len = read_zlib(map, raw_base, size)
499
assert size == len(uncomp)
500
return type, (delta_base_offset, uncomp), comp_len+raw_base
501
elif type == 7: # ref delta
502
basename = map[raw_base:raw_base+20]
503
uncomp, comp_len = read_zlib(map, raw_base+20, size)
504
assert size == len(uncomp)
505
return type, (basename, uncomp), comp_len+raw_base+20
507
uncomp, comp_len = read_zlib(map, raw_base, size)
508
assert len(uncomp) == size
509
return type, uncomp, comp_len+raw_base
512
class SHA1Writer(object):
514
def __init__(self, f):
516
self.sha1 = hashlib.sha1("")
518
def write(self, data):
519
self.sha1.update(data)
523
sha = self.sha1.digest()
524
assert len(sha) == 20
529
sha = self.write_sha()
537
def write_pack_object(f, type, object):
538
"""Write pack object to a file.
540
:param f: File to write to
541
:param o: Object to write
544
if type == 6: # ref delta
545
(delta_base_offset, object) = object
546
elif type == 7: # offset delta
547
(basename, object) = object
549
c = (type << 4) | (size & 15)
552
f.write(chr(c | 0x80))
556
if type == 6: # offset delta
557
ret = [delta_base_offset & 0x7f]
558
delta_base_offset >>= 7
559
while delta_base_offset:
560
delta_base_offset -= 1
561
ret.insert(0, 0x80 | (delta_base_offset & 0x7f))
562
delta_base_offset >>= 7
563
f.write("".join([chr(x) for x in ret]))
564
elif type == 7: # ref delta
565
assert len(basename) == 20
567
f.write(zlib.compress(object))
571
def write_pack(filename, objects, num_objects):
572
f = open(filename + ".pack", 'w')
574
entries, data_sum = write_pack_data(f, objects, num_objects)
578
write_pack_index_v2(filename + ".idx", entries, data_sum)
581
def write_pack_data(f, objects, num_objects):
582
"""Write a new pack file.
584
:param filename: The filename of the new pack file.
585
:param objects: List of objects to write.
586
:return: List with (name, offset, crc32 checksum) entries, pack checksum
590
f.write("PACK") # Pack header
591
f.write(struct.pack(">L", 2)) # Pack version
592
f.write(struct.pack(">L", num_objects)) # Number of objects in pack
594
sha1 = o.sha().digest()
597
t, o = o.as_raw_string()
598
offset = write_pack_object(f, t, o)
599
entries.append((sha1, offset, crc32))
600
return entries, f.write_sha()
603
def write_pack_index_v1(filename, entries, pack_checksum):
604
"""Write a new pack index file.
606
:param filename: The filename of the new pack index file.
607
:param entries: List of tuples with object name (sha), offset_in_pack, and
609
:param pack_checksum: Checksum of the pack file.
611
f = open(filename, 'w')
613
fan_out_table = defaultdict(lambda: 0)
614
for (name, offset, entry_checksum) in entries:
615
fan_out_table[ord(name[0])] += 1
617
for i in range(0x100):
618
f.write(struct.pack(">L", fan_out_table[i]))
619
fan_out_table[i+1] += fan_out_table[i]
620
for (name, offset, entry_checksum) in entries:
621
f.write(struct.pack(">L20s", offset, name))
622
assert len(pack_checksum) == 20
623
f.write(pack_checksum)
627
def apply_delta(src_buf, delta):
628
"""Based on the similar function in git's patch-delta.c."""
629
assert isinstance(src_buf, str), "was %r" % (src_buf,)
630
assert isinstance(delta, str)
635
return ord(ret), delta
636
def get_delta_header_size(delta):
640
cmd, delta = pop(delta)
641
size |= (cmd & ~0x80) << i
646
src_size, delta = get_delta_header_size(delta)
647
dest_size, delta = get_delta_header_size(delta)
648
assert src_size == len(src_buf)
650
cmd, delta = pop(delta)
655
x, delta = pop(delta)
656
cp_off |= x << (i * 8)
659
if cmd & (1 << (4+i)):
660
x, delta = pop(delta)
661
cp_size |= x << (i * 8)
664
if (cp_off + cp_size < cp_size or
665
cp_off + cp_size > src_size or
666
cp_size > dest_size):
668
out += src_buf[cp_off:cp_off+cp_size]
673
raise ApplyDeltaError("Invalid opcode 0")
676
raise ApplyDeltaError("delta not empty: %r" % delta)
678
if dest_size != len(out):
679
raise ApplyDeltaError("dest size incorrect")
684
def write_pack_index_v2(filename, entries, pack_checksum):
685
"""Write a new pack index file.
687
:param filename: The filename of the new pack index file.
688
:param entries: List of tuples with object name (sha), offset_in_pack, and
690
:param pack_checksum: Checksum of the pack file.
692
f = open(filename, 'w')
694
f.write('\377tOc') # Magic!
695
f.write(struct.pack(">L", 2))
696
fan_out_table = defaultdict(lambda: 0)
697
for (name, offset, entry_checksum) in entries:
698
fan_out_table[ord(name[0])] += 1
700
for i in range(0x100):
701
f.write(struct.pack(">L", fan_out_table[i]))
702
fan_out_table[i+1] += fan_out_table[i]
703
for (name, offset, entry_checksum) in entries:
705
for (name, offset, entry_checksum) in entries:
706
f.write(struct.pack(">l", entry_checksum))
707
for (name, offset, entry_checksum) in entries:
708
# FIXME: handle if MSBit is set in offset
709
f.write(struct.pack(">L", offset))
710
# FIXME: handle table for pack files > 8 Gb
711
assert len(pack_checksum) == 20
712
f.write(pack_checksum)
718
def __init__(self, basename):
719
self._basename = basename
720
self._data_path = self._basename + ".pack"
721
self._idx_path = self._basename + ".idx"
726
return self.idx.objects_sha1()
730
if self._data is None:
731
self._data = PackData(self._data_path)
732
assert len(self.idx) == len(self._data)
733
assert self.idx.get_stored_checksums()[0] == self._data.get_stored_checksum()
738
if self._idx is None:
739
self._idx = PackIndex(self._idx_path)
743
if self._data is not None:
747
def __eq__(self, other):
748
return type(self) == type(other) and self.idx == other.idx
751
"""Number of entries in this pack."""
755
return "Pack(%r)" % self._basename
758
"""Iterate over all the sha1s of the objects in this pack."""
759
return iter(self.idx)
762
return self.idx.check() and self.data.check()
764
def get_stored_checksum(self):
765
return self.data.get_stored_checksum()
767
def __contains__(self, sha1):
768
"""Check whether this pack contains a particular SHA1."""
769
return (self.idx.object_index(sha1) is not None)
771
def get_raw(self, sha1, resolve_ref=None):
772
if resolve_ref is None:
773
resolve_ref = self.get_raw
774
offset = self.idx.object_index(sha1)
778
type, obj = self.data.get_object_at(offset)
779
assert isinstance(offset, int)
780
return resolve_object(offset, type, obj, resolve_ref,
781
self.data.get_object_at)
783
def __getitem__(self, sha1):
784
"""Retrieve the specified SHA1."""
785
type, uncomp = self.get_raw(sha1)
786
return ShaFile.from_raw_string(type, uncomp)
788
def iterobjects(self):
789
for offset, type, obj in self.data.iterobjects():
790
assert isinstance(offset, int)
791
yield ShaFile.from_raw_string(
792
*resolve_object(offset, type, obj, self.get_raw,
793
self.data.get_object_at))
796
def load_packs(path):
797
if not os.path.exists(path):
799
for name in os.listdir(path):
800
if name.startswith("pack-") and name.endswith(".pack"):
801
yield Pack(os.path.join(path, name[:-len(".pack")]))
213
def _get_object_at(self, map):
214
first_byte = ord(map[0])
215
sign_extend = first_byte & 0x80
216
type = (first_byte >> 4) & 0x07
217
size = first_byte & 0x0f
219
while sign_extend > 0:
220
byte = ord(map[cur_offset+1])
221
sign_extend = byte & 0x80
222
size_part = byte & 0x7f
223
size += size_part << ((cur_offset * 7) + 4)
225
raw_base = cur_offset+1
226
# The size is the inflated size, so we have no idea what the deflated size
227
# is, so for now give it as much as we have. It should really iterate
228
# feeding it more data if it doesn't decompress, but as we have the whole
229
# thing then just use it.
231
uncomp = _decompress(raw)
232
obj = ShaFile.from_raw_string(type, uncomp)