36
36
from collections import defaultdict
38
from itertools import imap, izip
49
from errors import ApplyDeltaError
51
44
supports_mmap_offset = (sys.version_info[0] >= 3 or
52
45
(sys.version_info[0] == 2 and sys.version_info[1] >= 6))
55
def take_msb_bytes(map, offset):
57
while len(ret) == 0 or ret[-1] & 0x80:
58
ret.append(ord(map[offset]))
47
from objects import (ShaFile,
63
51
def read_zlib(data, offset, dec_size):
64
52
obj = zlib.decompressobj()
67
55
while obj.unused_data == "":
69
add = data[base:base+1024]
56
add = data[offset+fed:offset+fed+1024]
71
58
x += obj.decompress(add)
72
59
assert len(x) == dec_size
81
return sha.hexdigest()
84
64
def hex_to_sha(hex):
85
"""Convert a hex string to a binary sha string."""
87
66
for i in range(0, len(hex), 2):
88
67
ret += chr(int(hex[i:i+2], 16))
92
70
def sha_to_hex(sha):
93
"""Convert a binary sha string to a hex sha string."""
96
73
ret += "%02x" % ord(i)
100
76
MAX_MMAP_SIZE = 256 * 1024 * 1024
102
78
def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
103
"""Simple wrapper for mmap() which always supports the offset parameter.
105
:param f: File object.
106
:param offset: Offset in the file, from the beginning of the file.
107
:param size: Size of the mmap'ed area
108
:param access: Access mechanism.
109
:return: MMAP'd area.
111
79
if offset+size > MAX_MMAP_SIZE and not supports_mmap_offset:
112
80
raise AssertionError("%s is larger than 256 meg, and this version "
113
81
"of Python does not support the offset argument to mmap().")
138
106
return ArraySkipper(mem, offset)
109
def multi_ord(map, start, count):
111
for i in range(count):
112
value = value * 0x100 + ord(map[start+i])
141
116
def resolve_object(offset, type, obj, get_ref, get_offset):
142
"""Resolve an object, possibly resolving deltas when necessary."""
143
if not type in (6, 7): # Not a delta
146
117
if type == 6: # offset delta
147
118
(delta_offset, delta) = obj
148
assert isinstance(delta_offset, int)
149
assert isinstance(delta, str)
150
offset = offset-delta_offset
151
type, base_obj = get_offset(offset)
152
assert isinstance(type, int)
119
base_text = get_offset(offset-delta_offset)
153
121
elif type == 7: # ref delta
154
122
(basename, delta) = obj
155
assert isinstance(basename, str) and len(basename) == 20
156
assert isinstance(delta, str)
157
type, base_obj = get_ref(basename)
158
assert isinstance(type, int)
159
type, base_text = resolve_object(offset, type, base_obj, get_ref, get_offset)
160
return type, apply_delta(base_text, delta)
123
base_text = get_ref(basename)
163
129
class PackIndex(object):
198
165
self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
200
167
def __eq__(self, other):
201
if type(self) != type(other):
204
if self._fan_out_table != other._fan_out_table:
207
for (name1, _, _), (name2, _, _) in izip(self.iterentries(), other.iterentries()):
168
return (type(self) == type(other) and
169
self._fan_out_table == other._fan_out_table and
170
list(self.iterentries()) == list(other.iterentries()))
213
173
self._file.close()
403
357
offset += total_size
406
def iterentries(self, ext_resolve_ref=None):
360
def iterentries(self):
409
postponed = defaultdict(list)
410
class Postpone(Exception):
411
"""Raised to postpone delta resolving."""
413
def get_ref_text(sha):
418
return ext_resolve_ref(sha)
421
raise Postpone, (sha, )
422
todo = list(self.iterobjects())
424
(offset, type, obj) = todo.pop(0)
425
at[offset] = (type, obj)
426
assert isinstance(offset, int)
427
assert isinstance(type, int)
428
assert isinstance(obj, tuple) or isinstance(obj, str)
362
postponed = list(self.iterobjects())
364
(offset, type, obj) = postponed.pop()
430
type, obj = resolve_object(offset, type, obj, get_ref_text,
432
except Postpone, (sha, ):
433
postponed[sha].append((offset, type, obj))
366
type, obj = resolve_object(offset, type, obj, found.__getitem__,
369
postponed.append((offset, type, obj))
435
371
shafile = ShaFile.from_raw_string(type, obj)
436
372
sha = shafile.sha().digest()
437
373
found[sha] = (type, obj)
438
374
yield sha, offset, shafile.crc32()
439
todo += postponed.get(sha, [])
441
raise KeyError([sha_to_hex(h) for h in postponed.keys()])
443
def sorted_entries(self, resolve_ext_ref=None):
444
ret = list(self.iterentries(resolve_ext_ref))
448
376
def create_index_v1(self, filename):
449
entries = self.sorted_entries()
377
entries = list(self.iterentries())
450
378
write_pack_index_v1(filename, entries, self.calculate_checksum())
452
380
def create_index_v2(self, filename):
453
entries = self.sorted_entries()
454
write_pack_index_v2(filename, entries, self.calculate_checksum())
456
def get_stored_checksum(self):
457
return self._stored_checksum
381
entries = list(self.iterentries())
382
write_pack_index_v1(filename, entries, self.calculate_checksum())
460
return (self.calculate_checksum() == self.get_stored_checksum())
385
return (self.calculate_checksum() == self._stored_checksum)
462
387
def get_object_at(self, offset):
463
388
"""Given an offset in to the packfile return the object that is there.
466
391
then the packfile can be asked directly for that object using this
469
assert isinstance(offset, long) or isinstance(offset, int),\
470
"offset was %r" % offset
471
assert offset >= self._header_size
394
assert isinstance(offset, long) or isinstance(offset, int), "offset was %r" % offset
472
395
size = os.path.getsize(self._filename)
473
396
assert size == self._size, "Pack data %s has changed size, I don't " \
474
397
"like that" % self._filename
482
405
def _unpack_object(self, map):
483
bytes = take_msb_bytes(map, 0)
484
type = (bytes[0] >> 4) & 0x07
485
size = bytes[0] & 0x0f
486
for i, byte in enumerate(bytes[1:]):
487
size += (byte & 0x7f) << ((i * 7) + 4)
488
raw_base = len(bytes)
406
first_byte = ord(map[0])
407
sign_extend = first_byte & 0x80
408
type = (first_byte >> 4) & 0x07
409
size = first_byte & 0x0f
411
while sign_extend > 0:
412
byte = ord(map[cur_offset+1])
413
sign_extend = byte & 0x80
414
size_part = byte & 0x7f
415
size += size_part << ((cur_offset * 7) + 4)
417
raw_base = cur_offset+1
489
418
if type == 6: # offset delta
490
bytes = take_msb_bytes(map, raw_base)
491
assert not (bytes[-1] & 0x80)
492
delta_base_offset = bytes[0] & 0x7f
493
for byte in bytes[1:]:
494
delta_base_offset += 1
495
delta_base_offset <<= 7
496
delta_base_offset += (byte & 0x7f)
420
raise AssertionError("OFS_DELTA not yet supported")
498
421
uncomp, comp_len = read_zlib(map, raw_base, size)
499
422
assert size == len(uncomp)
500
return type, (delta_base_offset, uncomp), comp_len+raw_base
423
return type, (uncomp, offset), comp_len+raw_base
501
424
elif type == 7: # ref delta
502
basename = map[raw_base:raw_base+20]
503
uncomp, comp_len = read_zlib(map, raw_base+20, size)
425
basename = map[cur_offset:cur_offset+20]
427
uncomp, comp_len = read_zlib(map, raw_base, size)
504
428
assert size == len(uncomp)
505
return type, (basename, uncomp), comp_len+raw_base+20
429
# text = apply_delta(base, uncomp)
430
return type, (uncomp, basename), comp_len+raw_base
432
# The size is the inflated size, so we have no idea what the deflated size
433
# is, so for now give it as much as we have. It should really iterate
434
# feeding it more data if it doesn't decompress, but as we have the whole
435
# thing then just use it.
507
436
uncomp, comp_len = read_zlib(map, raw_base, size)
508
437
assert len(uncomp) == size
509
438
return type, uncomp, comp_len+raw_base
519
448
self.sha1.update(data)
520
449
self.f.write(data)
523
452
sha = self.sha1.digest()
524
453
assert len(sha) == 20
525
454
self.f.write(sha)
529
sha = self.write_sha()
537
def write_pack_object(f, type, object):
538
"""Write pack object to a file.
540
:param f: File to write to
541
:param o: Object to write
544
if type == 6: # ref delta
545
(delta_base_offset, object) = object
546
elif type == 7: # offset delta
547
(basename, object) = object
549
c = (type << 4) | (size & 15)
552
f.write(chr(c | 0x80))
556
if type == 6: # offset delta
557
ret = [delta_base_offset & 0x7f]
558
delta_base_offset >>= 7
559
while delta_base_offset:
560
delta_base_offset -= 1
561
ret.insert(0, 0x80 | (delta_base_offset & 0x7f))
562
delta_base_offset >>= 7
563
f.write("".join([chr(x) for x in ret]))
564
elif type == 7: # ref delta
565
assert len(basename) == 20
567
f.write(zlib.compress(object))
571
def write_pack(filename, objects, num_objects):
572
f = open(filename + ".pack", 'w')
574
entries, data_sum = write_pack_data(f, objects, num_objects)
578
write_pack_index_v2(filename + ".idx", entries, data_sum)
581
def write_pack_data(f, objects, num_objects):
459
def write_pack(filename, objects):
582
460
"""Write a new pack file.
584
462
:param filename: The filename of the new pack file.
585
463
:param objects: List of objects to write.
586
464
:return: List with (name, offset, crc32 checksum) entries, pack checksum
466
f = open(filename, 'w')
589
468
f = SHA1Writer(f)
590
469
f.write("PACK") # Pack header
591
470
f.write(struct.pack(">L", 2)) # Pack version
592
f.write(struct.pack(">L", num_objects)) # Number of objects in pack
471
f.write(struct.pack(">L", len(objects))) # Number of objects in pack
593
472
for o in objects:
594
sha1 = o.sha().digest()
597
t, o = o.as_raw_string()
598
offset = write_pack_object(f, t, o)
599
entries.append((sha1, offset, crc32))
600
return entries, f.write_sha()
473
pass # FIXME: Write object
474
return entries, f.close()
603
477
def write_pack_index_v1(filename, entries, pack_checksum):
653
527
for i in range(4):
654
528
if cmd & (1 << i):
655
529
x, delta = pop(delta)
656
cp_off |= x << (i * 8)
530
cp_off |= x << (x << (i * 8))
658
532
for i in range(3):
659
if cmd & (1 << (4+i)):
533
if cmd & (1 << (2 << 3+i)):
660
534
x, delta = pop(delta)
661
cp_size |= x << (i * 8)
535
cp_size |= x << (x << (i * 8))
536
if cp_size == 0: cp_size = 0x10000
664
537
if (cp_off + cp_size < cp_size or
665
538
cp_off + cp_size > src_size or
666
539
cp_size > dest_size):
668
out += src_buf[cp_off:cp_off+cp_size]
541
out += text[cp_off:cp_off+cp_size]
673
raise ApplyDeltaError("Invalid opcode 0")
550
raise AssertionError("Invalid opcode 0")
676
raise ApplyDeltaError("delta not empty: %r" % delta)
553
raise AssertionError("data not empty: %r" % data)
678
if dest_size != len(out):
679
raise ApplyDeltaError("dest size incorrect")
556
raise AssertionError("dest size not empty")
703
582
for (name, offset, entry_checksum) in entries:
705
584
for (name, offset, entry_checksum) in entries:
706
f.write(struct.pack(">l", entry_checksum))
585
f.write(struct.pack(">L", entry_checksum))
707
586
for (name, offset, entry_checksum) in entries:
708
587
# FIXME: handle if MSBit is set in offset
709
588
f.write(struct.pack(">L", offset))
718
597
def __init__(self, basename):
719
598
self._basename = basename
720
self._data_path = self._basename + ".pack"
721
self._idx_path = self._basename + ".idx"
726
return self.idx.objects_sha1()
730
if self._data is None:
731
self._data = PackData(self._data_path)
732
assert len(self.idx) == len(self._data)
733
assert self.idx.get_stored_checksums()[0] == self._data.get_stored_checksum()
738
if self._idx is None:
739
self._idx = PackIndex(self._idx_path)
743
if self._data is not None:
747
def __eq__(self, other):
748
return type(self) == type(other) and self.idx == other.idx
599
self._idx = PackIndex(basename + ".idx")
600
self._pack = PackData(basename + ".pack")
601
assert len(self._idx) == len(self._pack)
750
603
def __len__(self):
751
604
"""Number of entries in this pack."""
605
return len(self._idx)
754
607
def __repr__(self):
755
608
return "Pack(%r)" % self._basename
757
610
def __iter__(self):
758
611
"""Iterate over all the sha1s of the objects in this pack."""
759
return iter(self.idx)
612
return iter(self._idx)
762
return self.idx.check() and self.data.check()
764
def get_stored_checksum(self):
765
return self.data.get_stored_checksum()
615
return self._idx.check() and self._pack.check()
767
617
def __contains__(self, sha1):
768
618
"""Check whether this pack contains a particular SHA1."""
769
return (self.idx.object_index(sha1) is not None)
619
return (self._idx.object_index(sha1) is not None)
771
def get_raw(self, sha1, resolve_ref=None):
772
if resolve_ref is None:
773
resolve_ref = self.get_raw
774
offset = self.idx.object_index(sha1)
621
def _get_text(self, sha1):
622
offset = self._idx.object_index(sha1)
775
623
if offset is None:
776
624
raise KeyError(sha1)
778
type, obj = self.data.get_object_at(offset)
779
assert isinstance(offset, int)
780
return resolve_object(offset, type, obj, resolve_ref,
781
self.data.get_object_at)
626
type, obj = self._pack.get_object_at(offset)
627
return resolve_object(offset, type, obj, self._get_text,
628
self._pack.get_object_at)
783
630
def __getitem__(self, sha1):
784
631
"""Retrieve the specified SHA1."""
785
type, uncomp = self.get_raw(sha1)
632
type, uncomp = self._get_text(sha1)
786
633
return ShaFile.from_raw_string(type, uncomp)
788
def iterobjects(self):
789
for offset, type, obj in self.data.iterobjects():
790
assert isinstance(offset, int)
791
yield ShaFile.from_raw_string(
792
*resolve_object(offset, type, obj, self.get_raw,
793
self.data.get_object_at))
796
def load_packs(path):
797
if not os.path.exists(path):
799
for name in os.listdir(path):
800
if name.startswith("pack-") and name.endswith(".pack"):
801
yield Pack(os.path.join(path, name[:-len(".pack")]))