36
36
from collections import defaultdict
38
from itertools import imap, izip
49
from errors import ApplyDeltaError
51
43
supports_mmap_offset = (sys.version_info[0] >= 3 or
52
44
(sys.version_info[0] == 2 and sys.version_info[1] >= 6))
55
def take_msb_bytes(map, offset):
57
while len(ret) == 0 or ret[-1] & 0x80:
58
ret.append(ord(map[offset]))
63
def read_zlib(data, offset, dec_size):
64
obj = zlib.decompressobj()
67
while obj.unused_data == "":
69
add = data[base:base+1024]
71
x += obj.decompress(add)
72
assert len(x) == dec_size
73
comp_len = fed-len(obj.unused_data)
81
return sha.hexdigest()
46
from objects import (ShaFile,
84
50
def hex_to_sha(hex):
85
"""Convert a hex string to a binary sha string."""
87
52
for i in range(0, len(hex), 2):
88
53
ret += chr(int(hex[i:i+2], 16))
92
56
def sha_to_hex(sha):
93
"""Convert a binary sha string to a hex sha string."""
96
59
ret += "%02x" % ord(i)
100
62
MAX_MMAP_SIZE = 256 * 1024 * 1024
102
64
def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
103
"""Simple wrapper for mmap() which always supports the offset parameter.
105
:param f: File object.
106
:param offset: Offset in the file, from the beginning of the file.
107
:param size: Size of the mmap'ed area
108
:param access: Access mechanism.
109
:return: MMAP'd area.
111
65
if offset+size > MAX_MMAP_SIZE and not supports_mmap_offset:
112
66
raise AssertionError("%s is larger than 256 meg, and this version "
113
67
"of Python does not support the offset argument to mmap().")
138
92
return ArraySkipper(mem, offset)
141
def resolve_object(offset, type, obj, get_ref, get_offset):
142
"""Resolve an object, possibly resolving deltas when necessary."""
143
if not type in (6, 7): # Not a delta
146
if type == 6: # offset delta
147
(delta_offset, delta) = obj
148
assert isinstance(delta_offset, int)
149
assert isinstance(delta, str)
150
offset = offset-delta_offset
151
type, base_obj = get_offset(offset)
152
assert isinstance(type, int)
153
elif type == 7: # ref delta
154
(basename, delta) = obj
155
assert isinstance(basename, str) and len(basename) == 20
156
assert isinstance(delta, str)
157
type, base_obj = get_ref(basename)
158
assert isinstance(type, int)
159
type, base_text = resolve_object(offset, type, base_obj, get_ref, get_offset)
160
return type, apply_delta(base_text, delta)
95
def multi_ord(map, start, count):
97
for i in range(count):
98
value = value * 0x100 + ord(map[start+i])
163
102
class PackIndex(object):
300
222
size = os.path.getsize(self._filename)
301
223
assert size == self._size, "Pack index %s has changed size, I don't " \
302
224
"like that" % self._filename
304
sha = hex_to_sha(sha)
305
225
return self._object_index(sha)
307
def _object_index(self, sha):
227
def _object_index(self, hexsha):
308
228
"""See object_index"""
313
start = self._fan_out_table[idx-1]
314
end = self._fan_out_table[idx]
229
sha = hex_to_sha(hexsha)
230
start = self._fan_out_table[ord(sha[0])-1]
231
end = self._fan_out_table[ord(sha[0])]
317
233
i = (start + end)/2
318
234
file_sha = self._unpack_name(i)
236
return self._unpack_offset(i)
324
return self._unpack_offset(i)
396
def iterobjects(self):
397
offset = self._header_size
398
f = open(self._filename, 'rb')
399
for i in range(len(self)):
400
map = simple_mmap(f, offset, self._size-offset)
401
(type, obj, total_size) = self._unpack_object(map)
402
yield offset, type, obj
406
def iterentries(self, ext_resolve_ref=None):
409
postponed = defaultdict(list)
410
class Postpone(Exception):
411
"""Raised to postpone delta resolving."""
413
def get_ref_text(sha):
418
return ext_resolve_ref(sha)
421
raise Postpone, (sha, )
422
todo = list(self.iterobjects())
424
(offset, type, obj) = todo.pop(0)
425
at[offset] = (type, obj)
426
assert isinstance(offset, int)
427
assert isinstance(type, int)
428
assert isinstance(obj, tuple) or isinstance(obj, str)
430
type, obj = resolve_object(offset, type, obj, get_ref_text,
432
except Postpone, (sha, ):
433
postponed[sha].append((offset, type, obj))
435
shafile = ShaFile.from_raw_string(type, obj)
436
sha = shafile.sha().digest()
437
found[sha] = (type, obj)
438
yield sha, offset, shafile.crc32()
439
todo += postponed.get(sha, [])
441
raise KeyError([sha_to_hex(h) for h in postponed.keys()])
443
def sorted_entries(self, resolve_ext_ref=None):
444
ret = list(self.iterentries(resolve_ext_ref))
448
def create_index_v1(self, filename):
449
entries = self.sorted_entries()
450
write_pack_index_v1(filename, entries, self.calculate_checksum())
452
def create_index_v2(self, filename):
453
entries = self.sorted_entries()
454
write_pack_index_v2(filename, entries, self.calculate_checksum())
456
def get_stored_checksum(self):
457
return self._stored_checksum
460
return (self.calculate_checksum() == self.get_stored_checksum())
311
return (self.calculate_checksum() == self._stored_checksum)
462
313
def get_object_at(self, offset):
463
314
"""Given an offset in to the packfile return the object that is there.
465
316
Using the associated index the location of an object can be looked up, and
466
317
then the packfile can be asked directly for that object using this
320
Currently only non-delta objects are supported.
469
assert isinstance(offset, long) or isinstance(offset, int),\
470
"offset was %r" % offset
471
assert offset >= self._header_size
322
assert isinstance(offset, long) or isinstance(offset, int)
472
323
size = os.path.getsize(self._filename)
473
324
assert size == self._size, "Pack data %s has changed size, I don't " \
474
325
"like that" % self._filename
475
326
f = open(self._filename, 'rb')
477
328
map = simple_mmap(f, offset, size-offset)
478
return self._unpack_object(map)[:2]
329
return self._get_object_at(map)
482
def _unpack_object(self, map):
483
bytes = take_msb_bytes(map, 0)
484
type = (bytes[0] >> 4) & 0x07
485
size = bytes[0] & 0x0f
486
for i, byte in enumerate(bytes[1:]):
487
size += (byte & 0x7f) << ((i * 7) + 4)
488
raw_base = len(bytes)
489
if type == 6: # offset delta
490
bytes = take_msb_bytes(map, raw_base)
491
assert not (bytes[-1] & 0x80)
492
delta_base_offset = bytes[0] & 0x7f
493
for byte in bytes[1:]:
494
delta_base_offset += 1
495
delta_base_offset <<= 7
496
delta_base_offset += (byte & 0x7f)
498
uncomp, comp_len = read_zlib(map, raw_base, size)
499
assert size == len(uncomp)
500
return type, (delta_base_offset, uncomp), comp_len+raw_base
501
elif type == 7: # ref delta
502
basename = map[raw_base:raw_base+20]
503
uncomp, comp_len = read_zlib(map, raw_base+20, size)
504
assert size == len(uncomp)
505
return type, (basename, uncomp), comp_len+raw_base+20
507
uncomp, comp_len = read_zlib(map, raw_base, size)
508
assert len(uncomp) == size
509
return type, uncomp, comp_len+raw_base
333
def _get_object_at(self, map):
334
first_byte = ord(map[0])
335
sign_extend = first_byte & 0x80
336
type = (first_byte >> 4) & 0x07
337
size = first_byte & 0x0f
339
while sign_extend > 0:
340
byte = ord(map[cur_offset+1])
341
sign_extend = byte & 0x80
342
size_part = byte & 0x7f
343
size += size_part << ((cur_offset * 7) + 4)
345
raw_base = cur_offset+1
346
# The size is the inflated size, so we have no idea what the deflated size
347
# is, so for now give it as much as we have. It should really iterate
348
# feeding it more data if it doesn't decompress, but as we have the whole
349
# thing then just use it.
351
uncomp = _decompress(raw)
352
obj = ShaFile.from_raw_string(type, uncomp)
512
356
class SHA1Writer(object):
519
363
self.sha1.update(data)
520
364
self.f.write(data)
523
367
sha = self.sha1.digest()
524
368
assert len(sha) == 20
525
369
self.f.write(sha)
529
sha = self.write_sha()
537
def write_pack_object(f, type, object):
538
"""Write pack object to a file.
540
:param f: File to write to
541
:param o: Object to write
544
if type == 6: # ref delta
545
(delta_base_offset, object) = object
546
elif type == 7: # offset delta
547
(basename, object) = object
549
c = (type << 4) | (size & 15)
552
f.write(chr(c | 0x80))
556
if type == 6: # offset delta
557
ret = [delta_base_offset & 0x7f]
558
delta_base_offset >>= 7
559
while delta_base_offset:
560
delta_base_offset -= 1
561
ret.insert(0, 0x80 | (delta_base_offset & 0x7f))
562
delta_base_offset >>= 7
563
f.write("".join([chr(x) for x in ret]))
564
elif type == 7: # ref delta
565
assert len(basename) == 20
567
f.write(zlib.compress(object))
571
def write_pack(filename, objects, num_objects):
572
f = open(filename + ".pack", 'w')
574
entries, data_sum = write_pack_data(f, objects, num_objects)
578
write_pack_index_v2(filename + ".idx", entries, data_sum)
581
def write_pack_data(f, objects, num_objects):
374
def write_pack(filename, objects):
582
375
"""Write a new pack file.
584
377
:param filename: The filename of the new pack file.
585
378
:param objects: List of objects to write.
586
379
:return: List with (name, offset, crc32 checksum) entries, pack checksum
381
f = open(filename, 'w')
589
383
f = SHA1Writer(f)
590
384
f.write("PACK") # Pack header
591
385
f.write(struct.pack(">L", 2)) # Pack version
592
f.write(struct.pack(">L", num_objects)) # Number of objects in pack
386
f.write(struct.pack(">L", len(objects))) # Number of objects in pack
593
387
for o in objects:
594
sha1 = o.sha().digest()
597
t, o = o.as_raw_string()
598
offset = write_pack_object(f, t, o)
599
entries.append((sha1, offset, crc32))
600
return entries, f.write_sha()
388
pass # FIXME: Write object
389
return entries, f.close()
603
392
def write_pack_index_v1(filename, entries, pack_checksum):
627
def apply_delta(src_buf, delta):
628
"""Based on the similar function in git's patch-delta.c."""
629
assert isinstance(src_buf, str), "was %r" % (src_buf,)
630
assert isinstance(delta, str)
635
return ord(ret), delta
636
def get_delta_header_size(delta):
640
cmd, delta = pop(delta)
641
size |= (cmd & ~0x80) << i
646
src_size, delta = get_delta_header_size(delta)
647
dest_size, delta = get_delta_header_size(delta)
648
assert src_size == len(src_buf)
650
cmd, delta = pop(delta)
655
x, delta = pop(delta)
656
cp_off |= x << (i * 8)
659
if cmd & (1 << (4+i)):
660
x, delta = pop(delta)
661
cp_size |= x << (i * 8)
664
if (cp_off + cp_size < cp_size or
665
cp_off + cp_size > src_size or
666
cp_size > dest_size):
668
out += src_buf[cp_off:cp_off+cp_size]
673
raise ApplyDeltaError("Invalid opcode 0")
676
raise ApplyDeltaError("delta not empty: %r" % delta)
678
if dest_size != len(out):
679
raise ApplyDeltaError("dest size incorrect")
684
419
def write_pack_index_v2(filename, entries, pack_checksum):
685
420
"""Write a new pack index file.
703
440
for (name, offset, entry_checksum) in entries:
705
442
for (name, offset, entry_checksum) in entries:
706
f.write(struct.pack(">l", entry_checksum))
443
f.write(struct.pack(">L", entry_checksum))
707
444
for (name, offset, entry_checksum) in entries:
708
445
# FIXME: handle if MSBit is set in offset
709
446
f.write(struct.pack(">L", offset))
712
449
f.write(pack_checksum)
718
def __init__(self, basename):
719
self._basename = basename
720
self._data_path = self._basename + ".pack"
721
self._idx_path = self._basename + ".idx"
726
return self.idx.objects_sha1()
730
if self._data is None:
731
self._data = PackData(self._data_path)
732
assert len(self.idx) == len(self._data)
733
assert self.idx.get_stored_checksums()[0] == self._data.get_stored_checksum()
738
if self._idx is None:
739
self._idx = PackIndex(self._idx_path)
743
if self._data is not None:
747
def __eq__(self, other):
748
return type(self) == type(other) and self.idx == other.idx
751
"""Number of entries in this pack."""
755
return "Pack(%r)" % self._basename
758
"""Iterate over all the sha1s of the objects in this pack."""
759
return iter(self.idx)
762
return self.idx.check() and self.data.check()
764
def get_stored_checksum(self):
765
return self.data.get_stored_checksum()
767
def __contains__(self, sha1):
768
"""Check whether this pack contains a particular SHA1."""
769
return (self.idx.object_index(sha1) is not None)
771
def get_raw(self, sha1, resolve_ref=None):
772
if resolve_ref is None:
773
resolve_ref = self.get_raw
774
offset = self.idx.object_index(sha1)
778
type, obj = self.data.get_object_at(offset)
779
assert isinstance(offset, int)
780
return resolve_object(offset, type, obj, resolve_ref,
781
self.data.get_object_at)
783
def __getitem__(self, sha1):
784
"""Retrieve the specified SHA1."""
785
type, uncomp = self.get_raw(sha1)
786
return ShaFile.from_raw_string(type, uncomp)
788
def iterobjects(self):
789
for offset, type, obj in self.data.iterobjects():
790
assert isinstance(offset, int)
791
yield ShaFile.from_raw_string(
792
*resolve_object(offset, type, obj, self.get_raw,
793
self.data.get_object_at))
796
def load_packs(path):
797
if not os.path.exists(path):
799
for name in os.listdir(path):
800
if name.startswith("pack-") and name.endswith(".pack"):
801
yield Pack(os.path.join(path, name[:-len(".pack")]))