4
4
* This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5
5
* http://www.xmailserver.org/xdiff-lib.html
7
* Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
7
* Rewritten for GIT by Nicolas Pitre <nico@fluxnic.net>, (C) 2005-2007
8
8
* Adapted for Bazaar by John Arbash Meinel <john@arbash-meinel.com> (C) 2009
10
10
* This program is free software; you can redistribute it and/or modify
220
220
unsigned int i, j, hmask, memsize, fit_in_old, copied_count;
221
221
struct unpacked_index_entry *entry;
222
222
struct delta_index *index;
223
struct index_entry *packed_entry, **packed_hash, *old_entry, *copy_from;
223
struct index_entry *packed_entry, **packed_hash, *old_entry;
224
224
struct index_entry null_entry = {0};
280
280
if (fit_in_old) {
281
281
// fprintf(stderr, "Fit all %d entries into old index\n",
282
282
// copied_count);
283
/* No need to allocate a new buffer */
284
* No need to allocate a new buffer, but return old_index ptr so
285
* callers can distinguish this from an OOM failure.
286
289
// fprintf(stderr, "Fit only %d entries into old index,"
287
290
// " reallocating\n", copied_count);
331
334
* old_index->hash_mask? Would it make any real difference?
333
336
j = i & old_index->hash_mask;
334
copy_from = old_index->hash[j];
335
337
for (old_entry = old_index->hash[j];
336
338
old_entry < old_index->hash[j + 1] && old_entry->ptr != NULL;
374
376
create_delta_index(const struct source_info *src,
375
struct delta_index *old)
377
struct delta_index *old,
378
struct delta_index **fresh,
379
int max_bytes_to_index)
377
381
unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
378
unsigned int total_num_entries;
382
unsigned int total_num_entries, stride, max_entries;
379
383
const unsigned char *data, *buffer;
380
384
struct delta_index *index;
381
385
struct unpacked_index_entry *entry, **hash;
383
387
unsigned long memsize;
385
389
if (!src->buf || !src->size)
390
return DELTA_SOURCE_EMPTY;
387
391
buffer = src->buf;
389
393
/* Determine index hash size. Note that indexing skips the
390
first byte to allow for optimizing the Rabin's polynomial
391
initialization in create_delta(). */
394
first byte so we subtract 1 to get the edge cases right.
396
stride = RABIN_WINDOW;
392
397
num_entries = (src->size - 1) / RABIN_WINDOW;
398
if (max_bytes_to_index > 0) {
399
max_entries = (unsigned int) (max_bytes_to_index / RABIN_WINDOW);
400
if (num_entries > max_entries) {
401
/* Limit the max number of matching entries. This reduces the 'best'
402
* possible match, but means we don't consume all of ram.
404
num_entries = max_entries;
405
stride = (src->size - 1) / num_entries;
394
409
total_num_entries = num_entries + old->num_entries;
419
434
hash_count = calloc(hsize, sizeof(*hash_count));
420
435
if (!hash_count) {
437
return DELTA_OUT_OF_MEMORY;
425
440
/* then populate the index for the new data */
427
for (data = buffer + num_entries * RABIN_WINDOW - RABIN_WINDOW;
442
for (data = buffer + num_entries * stride - RABIN_WINDOW;
429
data -= RABIN_WINDOW) {
430
445
unsigned int val = 0;
431
446
for (i = 1; i <= RABIN_WINDOW; i++)
432
447
val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
450
465
total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
451
466
total_num_entries);
452
467
free(hash_count);
456
468
index = pack_delta_index(hash, hsize, total_num_entries, old);
470
/* pack_delta_index only returns NULL on malloc failure */
472
return DELTA_OUT_OF_MEMORY;
461
474
index->last_src = src;
465
479
/* Take some entries, and put them into a custom hash.
473
487
unsigned int hsize)
475
489
unsigned int hash_offset, hmask, memsize;
476
struct index_entry *entry, *last_entry;
490
struct index_entry *entry;
477
491
struct index_entry_linked_list *out_entry, **hash;
493
507
/* We know that entries are in the order we want in the output, but they
494
508
* aren't "grouped" by hash bucket yet.
496
last_entry = entries + num_entries;
497
510
for (entry = entries + num_entries - 1; entry >= entries; --entry) {
498
511
hash_offset = entry->val & hmask;
499
512
out_entry->p_entry = entry;
518
531
unsigned int i, j, hsize, hmask, total_num_entries;
519
532
struct delta_index *index;
520
533
struct index_entry *entry, *packed_entry, **packed_hash;
521
struct index_entry *last_entry, null_entry = {0};
534
struct index_entry null_entry = {0};
523
536
unsigned long memsize;
524
537
struct index_entry_linked_list *unpacked_entry, **mini_hash;
683
695
create_delta_index_from_delta(const struct source_info *src,
684
struct delta_index *old_index)
696
struct delta_index *old_index,
697
struct delta_index **fresh)
686
699
unsigned int i, num_entries, max_num_entries, prev_val, num_inserted;
687
700
unsigned int hash_offset;
690
703
struct delta_index *new_index;
691
704
struct index_entry *entry, *entries;
707
return DELTA_INDEX_NEEDED;
693
708
if (!src->buf || !src->size)
709
return DELTA_SOURCE_EMPTY;
695
710
buffer = src->buf;
696
711
top = buffer + src->size;
705
720
max_num_entries = (src->size - 1) / RABIN_WINDOW;
722
if (!max_num_entries) {
707
727
/* allocate an array to hold whatever entries we find */
708
728
entries = malloc(sizeof(*entry) * max_num_entries);
709
729
if (!entries) /* malloc failure */
730
return DELTA_OUT_OF_MEMORY;
712
732
/* then populate the index for the new data */
776
796
if (data != top) {
777
/* Something was wrong with this delta */
797
/* The source_info data passed was corrupted or otherwise invalid */
799
return DELTA_SOURCE_BAD;
781
801
if (num_entries == 0) {
782
802
/** Nothing to index **/
786
assert(old_index != NULL);
787
807
old_index->last_src = src;
788
808
/* See if we can fill in these values into the holes in the array */
841
861
new_index = create_index_from_old_and_new_entries(old_index,
842
862
entry, num_entries);
864
new_index = old_index;
845
865
// fprintf(stderr, "inserted %d without resizing\n", num_inserted);
868
/* create_index_from_old_and_new_entries returns NULL on malloc failure */
870
return DELTA_OUT_OF_MEMORY;
851
875
void free_delta_index(struct delta_index *index)
868
893
#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
871
896
create_delta(const struct delta_index *index,
872
897
const void *trg_buf, unsigned long trg_size,
873
unsigned long *delta_size, unsigned long max_size)
898
unsigned long *delta_size, unsigned long max_size,
875
unsigned int i, outpos, outsize, moff, msize, val;
901
unsigned int i, outpos, outsize, moff, val;
876
903
const struct source_info *msource;
878
905
const unsigned char *ref_data, *ref_top, *data, *top;
879
906
unsigned char *out;
880
unsigned long source_size;
882
908
if (!trg_buf || !trg_size)
909
return DELTA_BUFFER_EMPTY;
884
910
if (index == NULL)
911
return DELTA_INDEX_NEEDED;
956
980
* match more bytes with this location that we have already
959
if (ref_size > top - src)
983
if (ref_size > (top - src))
960
984
ref_size = top - src;
961
985
if (ref_size <= msize)
963
987
/* See how many bytes actually match at this location. */
964
988
while (ref_size-- && *src++ == *ref)
966
if (msize < ref - entry->ptr) {
990
if (msize < (ref - entry->ptr)) {
967
991
/* this is our best match so far */
968
992
msize = ref - entry->ptr;
969
993
msource = entry->src;
1026
1050
assert(moff < msource->size);
1027
1051
moff += msource->agg_offset;
1028
assert(moff + msize <= source_size);
1053
<= index->last_src->size + index->last_src->agg_offset);
1029
1054
if (moff & 0x000000ff)
1030
1055
out[outpos++] = moff >> 0, i |= 0x01;
1031
1056
if (moff & 0x0000ff00)
1080
1105
if (max_size && outpos > max_size) {
1107
return DELTA_SIZE_TOO_BIG;
1085
1110
*delta_size = outpos;
1117
get_entry_summary(const struct delta_index *index, int pos,
1118
unsigned int *text_offset, unsigned int *hash_val)
1121
const struct index_entry *entry;
1122
const struct index_entry *start_of_entries;
1123
unsigned int offset;
1124
if (pos < 0 || text_offset == NULL || hash_val == NULL
1129
hsize = index->hash_mask + 1;
1130
start_of_entries = (struct index_entry *)(((struct index_entry **)index->hash) + (hsize + 1));
1131
entry = start_of_entries + pos;
1132
if (entry > index->last_entry) {
1135
if (entry->ptr == NULL) {
1139
offset = entry->src->agg_offset;
1140
offset += (entry->ptr - ((unsigned char *)entry->src->buf));
1141
*text_offset = offset;
1142
*hash_val = entry->val;
1149
get_hash_offset(const struct delta_index *index, int pos,
1150
unsigned int *entry_offset)
1153
const struct index_entry *entry;
1154
const struct index_entry *start_of_entries;
1155
if (pos < 0 || index == NULL || entry_offset == NULL)
1159
hsize = index->hash_mask + 1;
1160
start_of_entries = (struct index_entry *)(((struct index_entry **)index->hash) + (hsize + 1));
1164
entry = index->hash[pos];
1165
if (entry == NULL) {
1168
*entry_offset = (entry - start_of_entries);
1175
rabin_hash(const unsigned char *data)
1178
unsigned int val = 0;
1179
for (i = 0; i < RABIN_WINDOW; i++)
1180
val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
1089
1184
/* vim: et ts=4 sw=4 sts=4