/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to diff-delta.c

  • Committer: John Arbash Meinel
  • Date: 2009-03-03 16:02:22 UTC
  • mto: (0.17.31 trunk)
  • mto: This revision was merged to the branch mainline in revision 4280.
  • Revision ID: john@arbash-meinel.com-20090303160222-4bkou2s65s60h75a
Start moving the information about source buffers into the actual index_entry.

This leads the way for combining indexes for multiple sources together.

Show diffs side-by-side

added added

removed removed

Lines of Context:
112
112
        0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
113
113
};
114
114
 
 
115
struct source_info {
 
116
        const void *src_buf; /* Pointer to the beginning of source data */
 
117
        unsigned long src_size; /* Total length of source data */
 
118
        unsigned long agg_src_offset; /* Start of source data as part of the
 
119
                                                                         aggregate source */
 
120
};
 
121
 
115
122
struct index_entry {
116
123
        const unsigned char *ptr;
 
124
        const struct source_info *src;
117
125
        unsigned int val;
118
126
};
119
127
 
124
132
 
125
133
struct delta_index {
126
134
        unsigned long memsize; /* Total bytes pointed to by this index */
127
 
        const void *src_buf; /* Pointer to the beginning of source data */
128
 
        unsigned long src_size; /* Total length of source data */
129
 
        unsigned long agg_src_offset; /* Start of source data as part of the
130
 
                                                                         aggregate source */
 
135
        struct source_info src; /* Information about the referenced source */
131
136
        unsigned int hash_mask; /* val & hash_mask gives the hash index for a given
132
137
                                                           entry */
 
138
        unsigned int num_entries; /* The total number of entries in this index */
133
139
        struct index_entry *hash[];
134
140
};
135
141
 
216
222
        index = mem;
217
223
        index->memsize = memsize;
218
224
        index->hash_mask = hsize - 1;
 
225
        index->num_entries = entries;
219
226
 
220
227
        mem = index->hash;
221
228
        packed_hash = mem;
228
235
                 * into consecutive array entries.
229
236
                 */
230
237
                packed_hash[i] = packed_entry;
231
 
                for (entry = hash[i]; entry; entry = entry->next)
232
 
                        *packed_entry++ = entry->entry;
 
238
                for (entry = hash[i]; entry; entry = entry->next, ++packed_entry) {
 
239
                        *packed_entry = entry->entry;
 
240
                        packed_entry->src = &index->src;
 
241
                }
233
242
        }
234
243
 
235
244
        /* Sentinel value to indicate the length of the last hash bucket */
311
320
        if (!index) {
312
321
                return NULL;
313
322
        }
314
 
        index->src_buf = buf;
315
 
        index->src_size = bufsize;
316
 
        index->agg_src_offset = agg_src_offset;
 
323
        index->src.src_buf = buf;
 
324
        index->src.src_size = bufsize;
 
325
        index->src.agg_src_offset = agg_src_offset;
317
326
        return index;
318
327
}
319
328
 
343
352
                         unsigned long *delta_size, unsigned long max_size)
344
353
{
345
354
        unsigned int i, j, outpos, outsize, moff, msize, val;
346
 
        const struct delta_index *index, *mindex;
 
355
        const struct delta_index *index;
 
356
        const struct source_info *msource;
347
357
        int inscnt;
348
358
        const unsigned char *ref_data, *ref_top, *data, *top;
349
359
        unsigned char *out;
366
376
        index = indexes[0];
367
377
        for (j = 0; j < num_indexes; ++j) {
368
378
                index = indexes[j];
369
 
                i += index->src_size;
 
379
                i += index->src.src_size;
370
380
        }
371
 
        assert(i <= index->src_size + index->agg_src_offset);
372
 
        i = index->src_size + index->agg_src_offset;
 
381
        assert(i <= index->src.src_size + index->src.agg_src_offset);
 
382
        i = index->src.src_size + index->src.agg_src_offset;
373
383
        while (i >= 0x80) {
374
384
                out[outpos++] = i | 0x80;
375
385
                i >>= 7;
404
414
 
405
415
        moff = 0;
406
416
        msize = 0;
407
 
        mindex = NULL;
 
417
        msource = NULL;
408
418
        while (data < top) {
409
419
                if (msize < 4096) {
410
420
                        /* we don't have a 'worthy enough' match yet, so let's look for
417
427
                        for (j = 0; j < num_indexes; ++j) {
418
428
                                index = indexes[j];
419
429
                                i = val & index->hash_mask;
420
 
                                ref_top = index->src_buf + index->src_size;
421
 
                                ref_data = index->src_buf;
422
430
                                /* TODO: When using multiple indexes like this, the hash tables
423
431
                                 *               mapping val => index_entry become less efficient.
424
432
                                 *               You end up getting a lot more collisions in the hash,
426
434
                                 */
427
435
                                for (entry = index->hash[i]; entry < index->hash[i+1];
428
436
                                         entry++) {
429
 
                                        const unsigned char *ref = entry->ptr;
430
 
                                        const unsigned char *src = data;
431
 
                                        unsigned int ref_size = ref_top - ref;
 
437
                                        const unsigned char *ref;
 
438
                                        const unsigned char *src;
 
439
                                        unsigned int ref_size;
432
440
                                        if (entry->val != val)
433
441
                                                continue;
 
442
                                        ref = entry->ptr;
 
443
                                        src = data;
 
444
                                        ref_data = entry->src->src_buf;
 
445
                                        ref_top = ref_data + entry->src->src_size;
 
446
                                        ref_size = ref_top - ref;
434
447
                                        /* ref_size is the longest possible match that we could make
435
448
                                         * here. If ref_size <= msize, then we know that we cannot
436
449
                                         * match more bytes with this location that we have already
446
459
                                        if (msize < ref - entry->ptr) {
447
460
                                                /* this is our best match so far */
448
461
                                                msize = ref - entry->ptr;
449
 
                                                mindex = index;
 
462
                                                msource = entry->src;
450
463
                                                moff = entry->ptr - ref_data;
451
464
                                                if (msize >= 4096) /* good enough */
452
465
                                                        break;
477
490
                        unsigned char *op;
478
491
 
479
492
                        if (inscnt) {
480
 
                                ref_data = mindex->src_buf;
 
493
                                ref_data = msource->src_buf;
481
494
                                while (moff && ref_data[moff-1] == data[-1]) {
482
495
                                        /* we can match one byte back */
483
496
                                        msize++;
504
517
                        /* moff is the offset in the local structure, for encoding, we need
505
518
                         * to push it into the global offset
506
519
                         */
507
 
                        moff += mindex->agg_src_offset;
 
520
                        moff += msource->agg_src_offset;
508
521
                        if (moff & 0x000000ff)
509
522
                                out[outpos++] = moff >> 0,  i |= 0x01;
510
523
                        if (moff & 0x0000ff00)
516
529
                        /* Put it back into local coordinates, in case we have multiple
517
530
                         * copies in a row.
518
531
                         */
519
 
                        moff -= mindex->agg_src_offset;
 
532
                        moff -= msource->agg_src_offset;
520
533
 
521
534
                        if (msize & 0x00ff)
522
535
                                out[outpos++] = msize >> 0, i |= 0x10;