84
81
return sha.hexdigest()
85
"""Convert a hex string to a binary sha string."""
87
for i in range(0, len(hex), 2):
88
ret += chr(int(hex[i:i+2], 16))
93
"""Convert a binary sha string to a hex sha string."""
96
ret += "%02x" % ord(i)
87
100
MAX_MMAP_SIZE = 256 * 1024 * 1024
89
102
def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
315
def read_pack_header(f):
317
assert header[:4] == "PACK"
318
(version,) = struct.unpack_from(">L", header, 4)
319
assert version in (2, 3), "Version was %d" % version
320
(num_objects,) = struct.unpack_from(">L", header, 8)
321
return (version, num_objects)
324
def read_pack_tail(f):
328
def unpack_object(map):
329
bytes = take_msb_bytes(map, 0)
330
type = (bytes[0] >> 4) & 0x07
331
size = bytes[0] & 0x0f
332
for i, byte in enumerate(bytes[1:]):
333
size += (byte & 0x7f) << ((i * 7) + 4)
334
raw_base = len(bytes)
335
if type == 6: # offset delta
336
bytes = take_msb_bytes(map, raw_base)
337
assert not (bytes[-1] & 0x80)
338
delta_base_offset = bytes[0] & 0x7f
339
for byte in bytes[1:]:
340
delta_base_offset += 1
341
delta_base_offset <<= 7
342
delta_base_offset += (byte & 0x7f)
344
uncomp, comp_len = read_zlib(map, raw_base, size)
345
assert size == len(uncomp)
346
return type, (delta_base_offset, uncomp), comp_len+raw_base
347
elif type == 7: # ref delta
348
basename = map[raw_base:raw_base+20]
349
uncomp, comp_len = read_zlib(map, raw_base+20, size)
350
assert size == len(uncomp)
351
return type, (basename, uncomp), comp_len+raw_base+20
353
uncomp, comp_len = read_zlib(map, raw_base, size)
354
assert len(uncomp) == size
355
return type, uncomp, comp_len+raw_base
358
328
class PackData(object):
359
329
"""The data contained in a packfile.
401
371
def _read_header(self):
402
372
f = open(self._filename, 'rb')
404
(version, self._num_objects) = \
406
375
f.seek(self._size-20)
407
(self._stored_checksum,) = read_pack_tail(f)
376
self._stored_checksum = f.read(20)
379
assert header[:4] == "PACK"
380
(version,) = struct.unpack_from(">L", header, 4)
381
assert version in (2, 3), "Version was %d" % version
382
(self._num_objects,) = struct.unpack_from(">L", header, 8)
411
384
def __len__(self):
412
385
"""Returns the number of objects in this pack."""
425
398
f = open(self._filename, 'rb')
426
399
for i in range(len(self)):
427
400
map = simple_mmap(f, offset, self._size-offset)
428
(type, obj, total_size) = unpack_object(map)
401
(type, obj, total_size) = self._unpack_object(map)
429
402
yield offset, type, obj
430
403
offset += total_size
502
475
f = open(self._filename, 'rb')
504
477
map = simple_mmap(f, offset, size-offset)
505
return unpack_object(map)[:2]
478
return self._unpack_object(map)[:2]
482
def _unpack_object(self, map):
483
bytes = take_msb_bytes(map, 0)
484
type = (bytes[0] >> 4) & 0x07
485
size = bytes[0] & 0x0f
486
for i, byte in enumerate(bytes[1:]):
487
size += (byte & 0x7f) << ((i * 7) + 4)
488
raw_base = len(bytes)
489
if type == 6: # offset delta
490
bytes = take_msb_bytes(map, raw_base)
491
assert not (bytes[-1] & 0x80)
492
delta_base_offset = bytes[0] & 0x7f
493
for byte in bytes[1:]:
494
delta_base_offset += 1
495
delta_base_offset <<= 7
496
delta_base_offset += (byte & 0x7f)
498
uncomp, comp_len = read_zlib(map, raw_base, size)
499
assert size == len(uncomp)
500
return type, (delta_base_offset, uncomp), comp_len+raw_base
501
elif type == 7: # ref delta
502
basename = map[raw_base:raw_base+20]
503
uncomp, comp_len = read_zlib(map, raw_base+20, size)
504
assert size == len(uncomp)
505
return type, (basename, uncomp), comp_len+raw_base+20
507
uncomp, comp_len = read_zlib(map, raw_base, size)
508
assert len(uncomp) == size
509
return type, uncomp, comp_len+raw_base
510
512
class SHA1Writer(object):
570
572
f = open(filename + ".pack", 'w')
572
574
entries, data_sum = write_pack_data(f, objects, num_objects)
576
578
write_pack_index_v2(filename + ".idx", entries, data_sum)
579
def write_pack_data(f, objects, num_objects, window=10):
581
def write_pack_data(f, objects, num_objects):
580
582
"""Write a new pack file.
582
584
:param filename: The filename of the new pack file.
583
585
:param objects: List of objects to write.
584
586
:return: List with (name, offset, crc32 checksum) entries, pack checksum
586
recency = list(objects)
587
# FIXME: Somehow limit delta depth
588
# FIXME: Make thin-pack optional (its not used when cloning a pack)
589
# Build a list of objects ordered by the magic Linus heuristic
590
# This helps us find good objects to diff against us
592
for obj, path in recency:
593
magic.append( (obj.type, path, 1, -len(obj.as_raw_string()[1]), obj) )
595
# Build a map of objects and their index in magic - so we can find preceeding objects
598
for i in range(len(magic)):
599
offs[magic[i][4]] = i
602
589
f = SHA1Writer(f)
603
590
f.write("PACK") # Pack header
604
591
f.write(struct.pack(">L", 2)) # Pack version
605
592
f.write(struct.pack(">L", num_objects)) # Number of objects in pack
606
for o, path in recency:
607
594
sha1 = o.sha().digest()
608
595
crc32 = o.crc32()
609
orig_t, raw = o.as_raw_string()
612
#for i in range(offs[o]-window, window):
613
# if i < 0 or i >= len(offs): continue
615
# if b.type != orig_t: continue
616
# _, base = b.as_raw_string()
617
# delta = create_delta(base, raw)
618
# if len(delta) < len(winner):
620
# t = 6 if magic[i][2] == 1 else 7
621
offset = write_pack_object(f, t, winner)
597
t, o = o.as_raw_string()
598
offset = write_pack_object(f, t, o)
622
599
entries.append((sha1, offset, crc32))
623
600
return entries, f.write_sha()
650
def create_delta(base_buf, target_buf):
651
"""Use python difflib to work out how to transform base_buf to target_buf"""
652
assert isinstance(base_buf, str)
653
assert isinstance(target_buf, str)
656
def encode_size(size):
666
out_buf += encode_size(len(base_buf))
667
out_buf += encode_size(len(target_buf))
668
# write out delta opcodes
669
seq = difflib.SequenceMatcher(a=base_buf, b=target_buf)
670
for opcode, i1, i2, j1, j2 in seq.get_opcodes():
671
# Git patch opcodes don't care about deletes!
672
#if opcode == "replace" or opcode == "delete":
674
if opcode == "equal":
675
# If they are equal, unpacker will use data from base_buf
676
# Write out an opcode that says what range to use
682
scratch += chr(o >> i)
687
scratch += chr(s >> i)
691
if opcode == "replace" or opcode == "insert":
692
# If we are replacing a range or adding one, then we just
693
# output it to the stream (prefixed by its size)
698
out_buf += target_buf[o:o+127]
702
out_buf += target_buf[o:o+s]
706
627
def apply_delta(src_buf, delta):
707
628
"""Based on the similar function in git's patch-delta.c."""
708
629
assert isinstance(src_buf, str), "was %r" % (src_buf,)
862
785
type, uncomp = self.get_raw(sha1)
863
786
return ShaFile.from_raw_string(type, uncomp)
865
def iterobjects(self, get_raw=None):
788
def iterobjects(self):
869
789
for offset, type, obj in self.data.iterobjects():
870
790
assert isinstance(offset, int)
871
791
yield ShaFile.from_raw_string(
872
*resolve_object(offset, type, obj,
874
self.data.get_object_at))
792
*resolve_object(offset, type, obj, self.get_raw,
793
self.data.get_object_at))
877
796
def load_packs(path):