81
84
return sha.hexdigest()
85
"""Convert a hex string to a binary sha string."""
87
for i in range(0, len(hex), 2):
88
ret += chr(int(hex[i:i+2], 16))
93
"""Convert a binary sha string to a hex sha string."""
96
ret += "%02x" % ord(i)
100
87
MAX_MMAP_SIZE = 256 * 1024 * 1024
102
89
def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
315
def read_pack_header(f):
317
assert header[:4] == "PACK"
318
(version,) = struct.unpack_from(">L", header, 4)
319
assert version in (2, 3), "Version was %d" % version
320
(num_objects,) = struct.unpack_from(">L", header, 8)
321
return (version, num_objects)
324
def read_pack_tail(f):
328
def unpack_object(map):
329
bytes = take_msb_bytes(map, 0)
330
type = (bytes[0] >> 4) & 0x07
331
size = bytes[0] & 0x0f
332
for i, byte in enumerate(bytes[1:]):
333
size += (byte & 0x7f) << ((i * 7) + 4)
334
raw_base = len(bytes)
335
if type == 6: # offset delta
336
bytes = take_msb_bytes(map, raw_base)
337
assert not (bytes[-1] & 0x80)
338
delta_base_offset = bytes[0] & 0x7f
339
for byte in bytes[1:]:
340
delta_base_offset += 1
341
delta_base_offset <<= 7
342
delta_base_offset += (byte & 0x7f)
344
uncomp, comp_len = read_zlib(map, raw_base, size)
345
assert size == len(uncomp)
346
return type, (delta_base_offset, uncomp), comp_len+raw_base
347
elif type == 7: # ref delta
348
basename = map[raw_base:raw_base+20]
349
uncomp, comp_len = read_zlib(map, raw_base+20, size)
350
assert size == len(uncomp)
351
return type, (basename, uncomp), comp_len+raw_base+20
353
uncomp, comp_len = read_zlib(map, raw_base, size)
354
assert len(uncomp) == size
355
return type, uncomp, comp_len+raw_base
328
358
class PackData(object):
329
359
"""The data contained in a packfile.
371
401
def _read_header(self):
372
402
f = open(self._filename, 'rb')
404
(version, self._num_objects) = \
375
406
f.seek(self._size-20)
376
self._stored_checksum = f.read(20)
407
(self._stored_checksum,) = read_pack_tail(f)
379
assert header[:4] == "PACK"
380
(version,) = struct.unpack_from(">L", header, 4)
381
assert version in (2, 3), "Version was %d" % version
382
(self._num_objects,) = struct.unpack_from(">L", header, 8)
384
411
def __len__(self):
385
412
"""Returns the number of objects in this pack."""
398
425
f = open(self._filename, 'rb')
399
426
for i in range(len(self)):
400
427
map = simple_mmap(f, offset, self._size-offset)
401
(type, obj, total_size) = self._unpack_object(map)
428
(type, obj, total_size) = unpack_object(map)
402
429
yield offset, type, obj
403
430
offset += total_size
475
502
f = open(self._filename, 'rb')
477
504
map = simple_mmap(f, offset, size-offset)
478
return self._unpack_object(map)[:2]
505
return unpack_object(map)[:2]
482
def _unpack_object(self, map):
483
bytes = take_msb_bytes(map, 0)
484
type = (bytes[0] >> 4) & 0x07
485
size = bytes[0] & 0x0f
486
for i, byte in enumerate(bytes[1:]):
487
size += (byte & 0x7f) << ((i * 7) + 4)
488
raw_base = len(bytes)
489
if type == 6: # offset delta
490
bytes = take_msb_bytes(map, raw_base)
491
assert not (bytes[-1] & 0x80)
492
delta_base_offset = bytes[0] & 0x7f
493
for byte in bytes[1:]:
494
delta_base_offset += 1
495
delta_base_offset <<= 7
496
delta_base_offset += (byte & 0x7f)
498
uncomp, comp_len = read_zlib(map, raw_base, size)
499
assert size == len(uncomp)
500
return type, (delta_base_offset, uncomp), comp_len+raw_base
501
elif type == 7: # ref delta
502
basename = map[raw_base:raw_base+20]
503
uncomp, comp_len = read_zlib(map, raw_base+20, size)
504
assert size == len(uncomp)
505
return type, (basename, uncomp), comp_len+raw_base+20
507
uncomp, comp_len = read_zlib(map, raw_base, size)
508
assert len(uncomp) == size
509
return type, uncomp, comp_len+raw_base
512
510
class SHA1Writer(object):
572
570
f = open(filename + ".pack", 'w')
574
572
entries, data_sum = write_pack_data(f, objects, num_objects)
578
576
write_pack_index_v2(filename + ".idx", entries, data_sum)
581
def write_pack_data(f, objects, num_objects):
579
def write_pack_data(f, objects, num_objects, window=10):
582
580
"""Write a new pack file.
584
582
:param filename: The filename of the new pack file.
585
583
:param objects: List of objects to write.
586
584
:return: List with (name, offset, crc32 checksum) entries, pack checksum
586
recency = list(objects)
587
# FIXME: Somehow limit delta depth
588
# FIXME: Make thin-pack optional (its not used when cloning a pack)
589
# Build a list of objects ordered by the magic Linus heuristic
590
# This helps us find good objects to diff against us
593
magic.append( (o._num_type, "filename", 1, -len(o.as_raw_string()[1]), o) )
595
# Build a map of objects and their index in magic - so we can find preceeding objects
598
for i in range(len(magic)):
599
offs[magic[i][4]] = i
589
602
f = SHA1Writer(f)
590
603
f.write("PACK") # Pack header
591
604
f.write(struct.pack(">L", 2)) # Pack version
592
605
f.write(struct.pack(">L", num_objects)) # Number of objects in pack
594
607
sha1 = o.sha().digest()
595
608
crc32 = o.crc32()
597
t, o = o.as_raw_string()
598
offset = write_pack_object(f, t, o)
609
orig_t, raw = o.as_raw_string()
612
#for i in range(offs[o]-window, window):
613
# if i < 0 or i >= len(offs): continue
615
# if b._num_type != orig_t: continue
616
# _, base = b.as_raw_string()
617
# delta = create_delta(base, raw)
618
# if len(delta) < len(winner):
620
# t = 6 if magic[i][2] == 1 else 7
621
offset = write_pack_object(f, t, winner)
599
622
entries.append((sha1, offset, crc32))
600
623
return entries, f.write_sha()
650
def create_delta(base_buf, target_buf):
651
"""Use python difflib to work out how to transform base_buf to target_buf"""
652
assert isinstance(base_buf, str)
653
assert isinstance(target_buf, str)
656
def encode_size(size):
666
out_buf += encode_size(len(base_buf))
667
out_buf += encode_size(len(target_buf))
668
# write out delta opcodes
669
seq = difflib.SequenceMatcher(a=base_buf, b=target_buf)
670
for opcode, i1, i2, j1, j2 in seq.get_opcodes():
671
# Git patch opcodes don't care about deletes!
672
#if opcode == "replace" or opcode == "delete":
674
if opcode == "equal":
675
# If they are equal, unpacker will use data from base_buf
676
# Write out an opcode that says what range to use
682
scratch += chr(o >> i)
687
scratch += chr(s >> i)
691
if opcode == "replace" or opcode == "insert":
692
# If we are replacing a range or adding one, then we just
693
# output it to the stream (prefixed by its size)
698
out_buf += target_buf[o:o+127]
702
out_buf += target_buf[o:o+s]
627
706
def apply_delta(src_buf, delta):
628
707
"""Based on the similar function in git's patch-delta.c."""
629
708
assert isinstance(src_buf, str), "was %r" % (src_buf,)
645
724
return size, delta
646
725
src_size, delta = get_delta_header_size(delta)
647
726
dest_size, delta = get_delta_header_size(delta)
648
assert src_size == len(src_buf)
727
assert src_size == len(src_buf), "%d vs %d" % (src_size, len(src_buf))
650
729
cmd, delta = pop(delta)