/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
1
/*
2
 * diff-delta.c: generate a delta between two buffers
3
 *
4
 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5
 * http://www.xmailserver.org/xdiff-lib.html
6
 *
5698.2.1 by Martin
Update Nicolas Pitre's email address
7
 * Rewritten for GIT by Nicolas Pitre <nico@fluxnic.net>, (C) 2005-2007
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
8
 * Adapted for Bazaar by John Arbash Meinel <john@arbash-meinel.com> (C) 2009
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
9
 *
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * NB: The version in GIT is 'version 2 of the Licence only', however Nicolas
16
 * has granted permission for use under 'version 2 or later' in private email
17
 * to Robert Collins and Karl Fogel on the 6th April 2009.
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
18
 */
19
3735.33.4 by John Arbash Meinel
The new layout is working.
20
#include <stdio.h>
21
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
22
#include "delta.h"
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
23
#include <stdlib.h>
24
#include <string.h>
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
25
#include <assert.h>
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
26
27
/* maximum hash entry list for the same hash bucket */
28
#define HASH_LIMIT 64
29
30
#define RABIN_SHIFT 23
31
#define RABIN_WINDOW 16
32
3735.33.13 by John Arbash Meinel
Tweak the number of blank spaces up just a tad.
33
/* The hash map is sized to put 4 entries per bucket, this gives us ~even room
34
 * for more data. Tweaking this number above 4 doesn't seem to help much,
35
 * anyway.
3735.33.6 by John Arbash Meinel
Increasing EXTRA_NULLS to 2 from 1 ups the hit rate
36
 */
3735.33.13 by John Arbash Meinel
Tweak the number of blank spaces up just a tad.
37
#define EXTRA_NULLS 4
3735.33.3 by John Arbash Meinel
include_entries_from_hash wasn't properly skipping NULL records.
38
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
39
static const unsigned int T[256] = {
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
40
    0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
41
    0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
42
    0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
43
    0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
44
    0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
45
    0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
46
    0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
47
    0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
48
    0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
49
    0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
50
    0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
51
    0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
52
    0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
53
    0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
54
    0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
55
    0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
56
    0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
57
    0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
58
    0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
59
    0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
60
    0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
61
    0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
62
    0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
63
    0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
64
    0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
65
    0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
66
    0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
67
    0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
68
    0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
69
    0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
70
    0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
71
    0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
72
    0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
73
    0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
74
    0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
75
    0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
76
    0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
77
    0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
78
    0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
79
    0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
80
    0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
81
    0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
82
    0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
83
};
84
85
static const unsigned int U[256] = {
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
86
    0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
87
    0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
88
    0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
89
    0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
90
    0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
91
    0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
92
    0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
93
    0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
94
    0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
95
    0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
96
    0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
97
    0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
98
    0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
99
    0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
100
    0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
101
    0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
102
    0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
103
    0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
104
    0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
105
    0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
106
    0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
107
    0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
108
    0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
109
    0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
110
    0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
111
    0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
112
    0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
113
    0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
114
    0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
115
    0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
116
    0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
117
    0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
118
    0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
119
    0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
120
    0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
121
    0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
122
    0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
123
    0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
124
    0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
125
    0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
126
    0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
127
    0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
128
    0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
129
};
130
131
struct index_entry {
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
132
    const unsigned char *ptr;
133
    const struct source_info *src;
134
    unsigned int val;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
135
};
136
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
137
struct index_entry_linked_list {
138
    struct index_entry *p_entry;
139
    struct index_entry_linked_list *next;
140
};
141
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
142
struct unpacked_index_entry {
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
143
    struct index_entry entry;
144
    struct unpacked_index_entry *next;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
145
};
146
147
struct delta_index {
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
148
    unsigned long memsize; /* Total bytes pointed to by this index */
149
    const struct source_info *last_src; /* Information about the referenced source */
150
    unsigned int hash_mask; /* val & hash_mask gives the hash index for a given
151
                               entry */
152
    unsigned int num_entries; /* The total number of entries in this index */
153
    struct index_entry *last_entry; /* Pointer to the last valid entry */
154
    struct index_entry *hash[];
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
155
};
156
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
157
static unsigned int
158
limit_hash_buckets(struct unpacked_index_entry **hash,
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
159
                   unsigned int *hash_count, unsigned int hsize,
160
                   unsigned int entries)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
161
{
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
162
    struct unpacked_index_entry *entry;
163
    unsigned int i;
164
    /*
165
     * Determine a limit on the number of entries in the same hash
166
     * bucket.  This guards us against pathological data sets causing
167
     * really bad hash distribution with most entries in the same hash
168
     * bucket that would bring us to O(m*n) computing costs (m and n
169
     * corresponding to reference and target buffer sizes).
170
     *
171
     * Make sure none of the hash buckets has more entries than
172
     * we're willing to test.  Otherwise we cull the entry list
173
     * uniformly to still preserve a good repartition across
174
     * the reference buffer.
175
     */
176
    for (i = 0; i < hsize; i++) {
177
        int acc;
178
179
        if (hash_count[i] <= HASH_LIMIT)
180
            continue;
181
182
        /* We leave exactly HASH_LIMIT entries in the bucket */
183
        entries -= hash_count[i] - HASH_LIMIT;
184
185
        entry = hash[i];
186
        acc = 0;
187
188
        /*
189
         * Assume that this loop is gone through exactly
190
         * HASH_LIMIT times and is entered and left with
191
         * acc==0.  So the first statement in the loop
192
         * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
193
         * to the accumulator, and the inner loop consequently
194
         * is run (hash_count[i]-HASH_LIMIT) times, removing
195
         * one element from the list each time.  Since acc
196
         * balances out to 0 at the final run, the inner loop
197
         * body can't be left with entry==NULL.  So we indeed
198
         * encounter entry==NULL in the outer loop only.
199
         */
200
        do {
201
            acc += hash_count[i] - HASH_LIMIT;
202
            if (acc > 0) {
203
                struct unpacked_index_entry *keep = entry;
204
                do {
205
                    entry = entry->next;
206
                    acc -= HASH_LIMIT;
207
                } while (acc > 0);
208
                keep->next = entry->next;
209
            }
210
            entry = entry->next;
211
        } while (entry);
212
    }
213
    return entries;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
214
}
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
215
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
216
static struct delta_index *
217
pack_delta_index(struct unpacked_index_entry **hash, unsigned int hsize,
3735.33.12 by John Arbash Meinel
Now we are able to weave 'add_source' into the existing index.
218
                 unsigned int num_entries, struct delta_index *old_index)
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
219
{
3735.33.14 by John Arbash Meinel
Simplify the code a bit. We don't repack as often, so go with a
220
    unsigned int i, j, hmask, memsize, fit_in_old, copied_count;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
221
    struct unpacked_index_entry *entry;
222
    struct delta_index *index;
3735.33.12 by John Arbash Meinel
Now we are able to weave 'add_source' into the existing index.
223
    struct index_entry *packed_entry, **packed_hash, *old_entry, *copy_from;
3735.33.1 by John Arbash Meinel
(broken, in progress), change the data structures to allow for inserting small deltas.
224
    struct index_entry null_entry = {0};
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
225
    void *mem;
3735.33.12 by John Arbash Meinel
Now we are able to weave 'add_source' into the existing index.
226
227
    hmask = hsize - 1;
228
3735.33.15 by John Arbash Meinel
Remove the noisy debugging code. (down to 23.1s)
229
    // if (old_index) {
230
    //     fprintf(stderr, "Packing %d entries into %d for total of %d entries"
231
    //                     " %x => %x\n",
232
    //                     num_entries - old_index->num_entries,
233
    //                     old_index->num_entries, num_entries,
234
    //                     old_index->hash_mask, hmask);
235
    // } else {
236
    //     fprintf(stderr, "Packing %d entries into a new index\n",
237
    //                     num_entries);
238
    // }
3735.33.12 by John Arbash Meinel
Now we are able to weave 'add_source' into the existing index.
239
    /* First, see if we can squeeze the new items into the existing structure.
240
     */
241
    fit_in_old = 0;
242
    copied_count = 0;
243
    if (old_index && old_index->hash_mask == hmask) {
244
        fit_in_old = 1;
245
        for (i = 0; i < hsize; ++i) {
246
            packed_entry = NULL;
247
            for (entry = hash[i]; entry; entry = entry->next) {
248
                if (packed_entry == NULL) {
249
                    /* Find the last open spot */
250
                    packed_entry = old_index->hash[i + 1];
251
                    --packed_entry;
252
                    while (packed_entry >= old_index->hash[i]
253
                           && packed_entry->ptr == NULL) {
254
                        --packed_entry;
255
                    }
256
                    ++packed_entry;
257
                }
258
                if (packed_entry >= old_index->hash[i+1]
259
                    || packed_entry->ptr != NULL) {
260
                    /* There are no free spots here :( */
261
                    fit_in_old = 0;
262
                    break;
263
                }
264
                /* We found an empty spot to put this entry
265
                 * Copy it over, and remove it from the linked list, just in
266
                 * case we end up running out of room later.
267
                 */
268
                *packed_entry++ = entry->entry;
269
                assert(entry == hash[i]);
270
                hash[i] = entry->next;
271
                copied_count += 1;
272
                old_index->num_entries++;
273
            }
274
            if (!fit_in_old) {
275
                break;
276
            }
277
        }
278
    }
279
    if (old_index) {
280
        if (fit_in_old) {
3735.33.15 by John Arbash Meinel
Remove the noisy debugging code. (down to 23.1s)
281
            // fprintf(stderr, "Fit all %d entries into old index\n",
282
            //                 copied_count);
5698.2.2 by Martin
Change create_delta_index signature so callers can treat NULL returns as MemoryError
283
            /*
284
             * No need to allocate a new buffer, but return old_index ptr so
285
             * callers can distinguish this from an OOM failure.
286
             */
287
            return old_index;
3735.33.12 by John Arbash Meinel
Now we are able to weave 'add_source' into the existing index.
288
        } else {
3735.33.15 by John Arbash Meinel
Remove the noisy debugging code. (down to 23.1s)
289
            // fprintf(stderr, "Fit only %d entries into old index,"
290
            //                 " reallocating\n", copied_count);
3735.33.12 by John Arbash Meinel
Now we are able to weave 'add_source' into the existing index.
291
        }
292
    }
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
293
    /*
294
     * Now create the packed index in array form
295
     * rather than linked lists.
3735.33.1 by John Arbash Meinel
(broken, in progress), change the data structures to allow for inserting small deltas.
296
     * Leave a 2-entry gap for inserting more entries between the groups
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
297
     */
298
    memsize = sizeof(*index)
299
        + sizeof(*packed_hash) * (hsize+1)
3735.33.3 by John Arbash Meinel
include_entries_from_hash wasn't properly skipping NULL records.
300
        + sizeof(*packed_entry) * (num_entries + hsize * EXTRA_NULLS);
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
301
    mem = malloc(memsize);
302
    if (!mem) {
303
        return NULL;
304
    }
305
306
    index = mem;
307
    index->memsize = memsize;
3735.33.11 by John Arbash Meinel
Shave 5s->3.3s in add_source by copying old entries across directly.
308
    index->hash_mask = hmask;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
309
    index->num_entries = num_entries;
3735.33.11 by John Arbash Meinel
Shave 5s->3.3s in add_source by copying old entries across directly.
310
    if (old_index) {
3735.33.16 by John Arbash Meinel
Handle when our current packing is sub-optimal.
311
        if (hmask < old_index->hash_mask) {
312
            fprintf(stderr, "hash mask was shrunk %x => %x\n",
313
                            old_index->hash_mask, hmask);
314
        }
3735.33.11 by John Arbash Meinel
Shave 5s->3.3s in add_source by copying old entries across directly.
315
        assert(hmask >= old_index->hash_mask);
316
    }
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
317
318
    mem = index->hash;
319
    packed_hash = mem;
320
    mem = packed_hash + (hsize+1);
321
    packed_entry = mem;
322
323
    for (i = 0; i < hsize; i++) {
324
        /*
325
         * Coalesce all entries belonging to one linked list
326
         * into consecutive array entries.
327
         */
328
        packed_hash[i] = packed_entry;
3735.33.11 by John Arbash Meinel
Shave 5s->3.3s in add_source by copying old entries across directly.
329
        /* Old comes earlier as a source, so it always comes first in a given
330
         * hash bucket.
331
         */
332
        if (old_index) {
333
            /* Could we optimize this to use memcpy when hmask ==
334
             * old_index->hash_mask? Would it make any real difference?
335
             */
336
            j = i & old_index->hash_mask;
3735.33.12 by John Arbash Meinel
Now we are able to weave 'add_source' into the existing index.
337
            copy_from = old_index->hash[j];
3735.33.11 by John Arbash Meinel
Shave 5s->3.3s in add_source by copying old entries across directly.
338
            for (old_entry = old_index->hash[j];
339
                 old_entry < old_index->hash[j + 1] && old_entry->ptr != NULL;
340
                 old_entry++) {
341
                if ((old_entry->val & hmask) == i) {
3735.33.14 by John Arbash Meinel
Simplify the code a bit. We don't repack as often, so go with a
342
                    *packed_entry++ = *old_entry;
3735.33.11 by John Arbash Meinel
Shave 5s->3.3s in add_source by copying old entries across directly.
343
                }
344
            }
345
        }
3735.33.1 by John Arbash Meinel
(broken, in progress), change the data structures to allow for inserting small deltas.
346
        for (entry = hash[i]; entry; entry = entry->next) {
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
347
            *packed_entry++ = entry->entry;
3735.33.1 by John Arbash Meinel
(broken, in progress), change the data structures to allow for inserting small deltas.
348
        }
3735.33.14 by John Arbash Meinel
Simplify the code a bit. We don't repack as often, so go with a
349
        /* TODO: At this point packed_entry - packed_hash[i] is the number of
350
         *       records that we have inserted into this hash bucket.
351
         *       We should *really* consider doing some limiting along the
352
         *       lines of limit_hash_buckets() to avoid pathological behavior.
353
         */
3735.33.3 by John Arbash Meinel
include_entries_from_hash wasn't properly skipping NULL records.
354
        /* Now add extra 'NULL' entries that we can use for future expansion. */
355
        for (j = 0; j < EXTRA_NULLS; ++j ) {
356
            *packed_entry++ = null_entry;
357
        }
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
358
    }
359
360
    /* Sentinel value to indicate the length of the last hash bucket */
361
    packed_hash[hsize] = packed_entry;
362
3735.33.4 by John Arbash Meinel
The new layout is working.
363
    if (packed_entry - (struct index_entry *)mem
364
        != num_entries + hsize*EXTRA_NULLS) {
3735.33.5 by John Arbash Meinel
Restoring the debugging for now.
365
        fprintf(stderr, "We expected %d entries, but created %d\n",
366
                num_entries + hsize*EXTRA_NULLS,
367
                (int)(packed_entry - (struct index_entry*)mem));
3735.33.4 by John Arbash Meinel
The new layout is working.
368
    }
3735.33.3 by John Arbash Meinel
include_entries_from_hash wasn't properly skipping NULL records.
369
    assert(packed_entry - (struct index_entry *)mem
370
            == num_entries + hsize*EXTRA_NULLS);
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
371
    index->last_entry = (packed_entry - 1);
372
    return index;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
373
}
374
3735.33.4 by John Arbash Meinel
The new layout is working.
375
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
376
delta_result
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
377
create_delta_index(const struct source_info *src,
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
378
                   struct delta_index *old,
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
379
                   struct delta_index **fresh,
380
                   int max_entries)
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
381
{
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
382
    unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
5755.2.1 by John Arbash Meinel
Set a maximum number of delta entries for a fulltext.
383
    unsigned int total_num_entries, stride;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
384
    const unsigned char *data, *buffer;
385
    struct delta_index *index;
386
    struct unpacked_index_entry *entry, **hash;
387
    void *mem;
388
    unsigned long memsize;
389
390
    if (!src->buf || !src->size)
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
391
        return DELTA_SOURCE_EMPTY;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
392
    buffer = src->buf;
393
394
    /* Determine index hash size.  Note that indexing skips the
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
395
       first byte so we subtract 1 to get the edge cases right.
396
     */
5755.2.1 by John Arbash Meinel
Set a maximum number of delta entries for a fulltext.
397
    stride = RABIN_WINDOW;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
398
    num_entries = (src->size - 1)  / RABIN_WINDOW;
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
399
    if (max_entries > 0 && num_entries > max_entries) {
5755.2.1 by John Arbash Meinel
Set a maximum number of delta entries for a fulltext.
400
        /* Limit the max number of matching entries. This reduces the 'best'
401
         * possible match, but means we don't consume all of ram.
402
         */
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
403
        num_entries = max_entries;
404
        stride = (src->size - 1) / num_entries;
5755.2.1 by John Arbash Meinel
Set a maximum number of delta entries for a fulltext.
405
    }
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
406
    if (old != NULL)
407
        total_num_entries = num_entries + old->num_entries;
408
    else
409
        total_num_entries = num_entries;
3735.33.8 by John Arbash Meinel
Reverted back to the same hash width, and bumped EXTRA_NULLS to 3.
410
    hsize = total_num_entries / 4;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
411
    for (i = 4; (1u << i) < hsize && i < 31; i++);
412
    hsize = 1 << i;
413
    hmask = hsize - 1;
3735.33.18 by John Arbash Meinel
*grow* the local hmask if it is smaller than expected, don't *shrink* it.
414
    if (old && old->hash_mask > hmask) {
3735.33.16 by John Arbash Meinel
Handle when our current packing is sub-optimal.
415
        hmask = old->hash_mask;
416
        hsize = hmask + 1;
417
    }
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
418
419
    /* allocate lookup index */
420
    memsize = sizeof(*hash) * hsize +
421
          sizeof(*entry) * total_num_entries;
422
    mem = malloc(memsize);
423
    if (!mem)
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
424
        return DELTA_OUT_OF_MEMORY;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
425
    hash = mem;
426
    mem = hash + hsize;
427
    entry = mem;
428
429
    memset(hash, 0, hsize * sizeof(*hash));
430
431
    /* allocate an array to count hash num_entries */
432
    hash_count = calloc(hsize, sizeof(*hash_count));
433
    if (!hash_count) {
434
        free(hash);
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
435
        return DELTA_OUT_OF_MEMORY;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
436
    }
437
438
    /* then populate the index for the new data */
439
    prev_val = ~0;
5755.2.1 by John Arbash Meinel
Set a maximum number of delta entries for a fulltext.
440
    for (data = buffer + num_entries * stride - RABIN_WINDOW;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
441
         data >= buffer;
5755.2.1 by John Arbash Meinel
Set a maximum number of delta entries for a fulltext.
442
         data -= stride) {
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
443
        unsigned int val = 0;
444
        for (i = 1; i <= RABIN_WINDOW; i++)
445
            val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
446
        if (val == prev_val) {
447
            /* keep the lowest of consecutive identical blocks */
448
            entry[-1].entry.ptr = data + RABIN_WINDOW;
449
            --num_entries;
450
            --total_num_entries;
451
        } else {
452
            prev_val = val;
453
            i = val & hmask;
454
            entry->entry.ptr = data + RABIN_WINDOW;
455
            entry->entry.val = val;
456
            entry->entry.src = src;
457
            entry->next = hash[i];
458
            hash[i] = entry++;
459
            hash_count[i]++;
460
        }
461
    }
3735.33.11 by John Arbash Meinel
Shave 5s->3.3s in add_source by copying old entries across directly.
462
    /* TODO: It would be nice to limit_hash_buckets at a better time. */
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
463
    total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
464
                                           total_num_entries);
465
    free(hash_count);
3735.33.11 by John Arbash Meinel
Shave 5s->3.3s in add_source by copying old entries across directly.
466
    index = pack_delta_index(hash, hsize, total_num_entries, old);
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
467
    free(hash);
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
468
    /* pack_delta_index only returns NULL on malloc failure */
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
469
    if (!index) {
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
470
        return DELTA_OUT_OF_MEMORY;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
471
    }
3735.33.12 by John Arbash Meinel
Now we are able to weave 'add_source' into the existing index.
472
    index->last_src = src;
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
473
    *fresh = index;
474
    return DELTA_OK;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
475
}
476
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
477
/* Take some entries, and put them into a custom hash.
478
 * @param entries   A list of entries, sorted by position in file
479
 * @param num_entries   Length of entries
480
 * @param out_hsize     The maximum size of the hash, the final size will be
481
 *                      returned here
482
 */
483
struct index_entry_linked_list **
484
_put_entries_into_hash(struct index_entry *entries, unsigned int num_entries,
3735.33.10 by John Arbash Meinel
Fix a bug when there is more than one entry (increment out_entry).
485
                       unsigned int hsize)
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
486
{
3735.33.11 by John Arbash Meinel
Shave 5s->3.3s in add_source by copying old entries across directly.
487
    unsigned int hash_offset, hmask, memsize;
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
488
    struct index_entry *entry;
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
489
    struct index_entry_linked_list *out_entry, **hash;
490
    void *mem;
491
492
    hmask = hsize - 1;
493
494
    memsize = sizeof(*hash) * hsize +
495
          sizeof(*out_entry) * num_entries;
496
    mem = malloc(memsize);
497
    if (!mem)
498
        return NULL;
499
    hash = mem;
500
    mem = hash + hsize;
501
    out_entry = mem;
502
503
    memset(hash, 0, sizeof(*hash)*(hsize+1));
504
505
    /* We know that entries are in the order we want in the output, but they
506
     * aren't "grouped" by hash bucket yet.
507
     */
508
    for (entry = entries + num_entries - 1; entry >= entries; --entry) {
3735.33.10 by John Arbash Meinel
Fix a bug when there is more than one entry (increment out_entry).
509
        hash_offset = entry->val & hmask;
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
510
        out_entry->p_entry = entry;
3735.33.10 by John Arbash Meinel
Fix a bug when there is more than one entry (increment out_entry).
511
        out_entry->next = hash[hash_offset];
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
512
        /* TODO: Remove entries that have identical vals, or at least filter
513
         *       the map a little bit.
514
         * if (hash[i] != NULL) {
515
         * }
516
         */
3735.33.10 by John Arbash Meinel
Fix a bug when there is more than one entry (increment out_entry).
517
        hash[hash_offset] = out_entry;
518
        ++out_entry;
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
519
    }
520
    return hash;
521
}
522
3735.33.4 by John Arbash Meinel
The new layout is working.
523
524
struct delta_index *
525
create_index_from_old_and_new_entries(const struct delta_index *old_index,
526
                                      struct index_entry *entries,
527
                                      unsigned int num_entries)
528
{
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
529
    unsigned int i, j, hsize, hmask, total_num_entries;
3735.33.4 by John Arbash Meinel
The new layout is working.
530
    struct delta_index *index;
531
    struct index_entry *entry, *packed_entry, **packed_hash;
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
532
    struct index_entry null_entry = {0};
3735.33.4 by John Arbash Meinel
The new layout is working.
533
    void *mem;
534
    unsigned long memsize;
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
535
    struct index_entry_linked_list *unpacked_entry, **mini_hash;
3735.33.4 by John Arbash Meinel
The new layout is working.
536
537
    /* Determine index hash size.  Note that indexing skips the
538
       first byte to allow for optimizing the Rabin's polynomial
539
       initialization in create_delta(). */
540
    total_num_entries = num_entries + old_index->num_entries;
3735.33.8 by John Arbash Meinel
Reverted back to the same hash width, and bumped EXTRA_NULLS to 3.
541
    hsize = total_num_entries / 4;
3735.33.4 by John Arbash Meinel
The new layout is working.
542
    for (i = 4; (1u << i) < hsize && i < 31; i++);
543
    hsize = 1 << i;
544
    if (hsize < old_index->hash_mask) {
545
        /* For some reason, there was a code path that would actually *shrink*
546
         * the hash size. This screws with some later code, and in general, I
547
         * think it better to make the hash bigger, rather than smaller. So
548
         * we'll just force the size here.
549
         * Possibly done by create_delta_index running into a
550
         * limit_hash_buckets call, that ended up transitioning across a
551
         * power-of-2. The cause isn't 100% clear, though.
552
         */
553
        hsize = old_index->hash_mask + 1;
554
    }
555
    hmask = hsize - 1;
3735.33.15 by John Arbash Meinel
Remove the noisy debugging code. (down to 23.1s)
556
    // fprintf(stderr, "resizing index to insert %d entries into array"
557
    //                 " with %d entries: %x => %x\n",
558
    //         num_entries, old_index->num_entries, old_index->hash_mask, hmask);
3735.33.4 by John Arbash Meinel
The new layout is working.
559
560
    memsize = sizeof(*index)
561
        + sizeof(*packed_hash) * (hsize+1)
562
        + sizeof(*packed_entry) * (total_num_entries + hsize*EXTRA_NULLS);
563
    mem = malloc(memsize);
564
    if (!mem) {
565
        return NULL;
566
    }
567
    index = mem;
568
    index->memsize = memsize;
569
    index->hash_mask = hmask;
570
    index->num_entries = total_num_entries;
571
    index->last_src = old_index->last_src;
572
573
    mem = index->hash;
574
    packed_hash = mem;
575
    mem = packed_hash + (hsize+1);
576
    packed_entry = mem;
577
3735.33.10 by John Arbash Meinel
Fix a bug when there is more than one entry (increment out_entry).
578
    mini_hash = _put_entries_into_hash(entries, num_entries, hsize);
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
579
    if (mini_hash == NULL) {
580
        free(index);
581
        return NULL;
582
    }
3735.33.4 by John Arbash Meinel
The new layout is working.
583
    for (i = 0; i < hsize; i++) {
584
        /*
585
         * Coalesce all entries belonging in one hash bucket
586
         * into consecutive array entries.
587
         * The entries in old_index all come before 'entries'.
588
         */
589
        packed_hash[i] = packed_entry;
590
        /* Copy any of the old entries across */
591
        /* Would we rather use memcpy? */
592
        if (hmask == old_index->hash_mask) {
593
            for (entry = old_index->hash[i];
3735.33.6 by John Arbash Meinel
Increasing EXTRA_NULLS to 2 from 1 ups the hit rate
594
                 entry < old_index->hash[i+1] && entry->ptr != NULL;
3735.33.4 by John Arbash Meinel
The new layout is working.
595
                 ++entry) {
596
                assert((entry->val & hmask) == i);
597
                *packed_entry++ = *entry;
598
            }
599
        } else {
600
            /* If we resized the index from this action, all of the old values
601
             * will be found in the previous location, but they will end up
602
             * spread across the new locations.
603
             */
604
            j = i & old_index->hash_mask;
605
            for (entry = old_index->hash[j];
3735.33.6 by John Arbash Meinel
Increasing EXTRA_NULLS to 2 from 1 ups the hit rate
606
                 entry < old_index->hash[j+1] && entry->ptr != NULL;
3735.33.4 by John Arbash Meinel
The new layout is working.
607
                 ++entry) {
608
                assert((entry->val & old_index->hash_mask) == j);
609
                if ((entry->val & hmask) == i) {
610
                    /* Any entries not picked up here will be picked up on the
611
                     * next pass.
612
                     */
613
                    *packed_entry++ = *entry;
614
                }
615
            }
616
        }
617
        /* Now see if we need to insert any of the new entries.
618
         * Note that loop ends up O(hsize*num_entries), so we expect that
619
         * num_entries is always small.
620
         * We also help a little bit by collapsing the entry range when the
621
         * endpoints are inserted. However, an alternative would be to build a
622
         * quick hash lookup for just the new entries.
623
         * Testing shows that this list can easily get up to about 100
624
         * entries, the tradeoff is a malloc, 1 pass over the entries, copying
625
         * them into a sorted buffer, and a free() when done,
626
         */
3735.33.10 by John Arbash Meinel
Fix a bug when there is more than one entry (increment out_entry).
627
        for (unpacked_entry = mini_hash[i];
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
628
             unpacked_entry;
629
             unpacked_entry = unpacked_entry->next) {
3735.33.10 by John Arbash Meinel
Fix a bug when there is more than one entry (increment out_entry).
630
            assert((unpacked_entry->p_entry->val & hmask) == i);
631
            *packed_entry++ = *(unpacked_entry->p_entry);
3735.33.4 by John Arbash Meinel
The new layout is working.
632
        }
633
        /* Now insert some extra nulls */
634
        for (j = 0; j < EXTRA_NULLS; ++j) {
635
            *packed_entry++ = null_entry;
636
        }
637
    }
3735.33.9 by John Arbash Meinel
When expanding an index put the entries into a hash.
638
    free(mini_hash);
3735.33.4 by John Arbash Meinel
The new layout is working.
639
640
    /* Sentinel value to indicate the length of the last hash bucket */
641
    packed_hash[hsize] = packed_entry;
642
643
    if ((packed_entry - (struct index_entry *)mem)
644
        != (total_num_entries + hsize*EXTRA_NULLS)) {
645
        fprintf(stderr, "We expected %d entries, but created %d\n",
646
                total_num_entries + hsize*EXTRA_NULLS,
647
                (int)(packed_entry - (struct index_entry*)mem));
648
        fflush(stderr);
649
    }
650
    assert((packed_entry - (struct index_entry *)mem)
651
           == (total_num_entries + hsize * EXTRA_NULLS));
652
    index->last_entry = (packed_entry - 1);
653
    return index;
654
}
655
656
657
void
658
get_text(char buff[128], const unsigned char *ptr)
659
{
660
    unsigned int i;
661
    const unsigned char *start;
662
    unsigned char cmd;
663
    start = (ptr-RABIN_WINDOW-1);
664
    cmd = *(start);
665
    if (cmd < 0x80) {// This is likely to be an insert instruction
666
        if (cmd < RABIN_WINDOW) {
667
            cmd = RABIN_WINDOW;
668
        }
669
    } else {
670
        /* This was either a copy [should never be] or it
671
         * was a longer insert so the insert start happened at 16 more
672
         * bytes back.
673
         */
674
        cmd = RABIN_WINDOW + 1;
675
    }
676
    if (cmd > 60) {
677
        cmd = 60; /* Be friendly to 80char terms */
678
    }
679
    /* Copy the 1 byte command, and 4 bytes after the insert */
680
    cmd += 5;
681
    memcpy(buff, start, cmd);
682
    buff[cmd] = 0;
683
    for (i = 0; i < cmd; ++i) {
684
        if (buff[i] == '\n') {
685
            buff[i] = 'N';
686
        } else if (buff[i] == '\t') {
687
            buff[i] = 'T';
688
        }
689
    }
690
}
691
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
692
delta_result
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
693
create_delta_index_from_delta(const struct source_info *src,
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
694
                              struct delta_index *old_index,
695
                              struct delta_index **fresh)
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
696
{
3735.33.4 by John Arbash Meinel
The new layout is working.
697
    unsigned int i, num_entries, max_num_entries, prev_val, num_inserted;
698
    unsigned int hash_offset;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
699
    const unsigned char *data, *buffer, *top;
700
    unsigned char cmd;
3735.33.4 by John Arbash Meinel
The new layout is working.
701
    struct delta_index *new_index;
4634.106.2 by John Arbash Meinel
Restructure the internals to be a bit more understandable.
702
    struct index_entry *entry, *entries;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
703
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
704
    if (!old_index)
705
        return DELTA_INDEX_NEEDED;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
706
    if (!src->buf || !src->size)
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
707
        return DELTA_SOURCE_EMPTY;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
708
    buffer = src->buf;
709
    top = buffer + src->size;
710
711
    /* Determine index hash size.  Note that indexing skips the
712
       first byte to allow for optimizing the Rabin's polynomial
713
       initialization in create_delta().
714
       This computes the maximum number of entries that could be held. The
715
       actual number will be recomputed during processing.
716
       */
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
717
3735.33.4 by John Arbash Meinel
The new layout is working.
718
    max_num_entries = (src->size - 1)  / RABIN_WINDOW;
719
720
    /* allocate an array to hold whatever entries we find */
721
    entries = malloc(sizeof(*entry) * max_num_entries);
722
    if (!entries) /* malloc failure */
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
723
        return DELTA_OUT_OF_MEMORY;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
724
725
    /* then populate the index for the new data */
726
    prev_val = ~0;
727
    data = buffer;
728
    /* target size */
4582.2.1 by John Arbash Meinel
Use a cast to improve compiler conformance.
729
    /* get_delta_hdr_size doesn't mutate the content, just moves the
730
     * start-of-data pointer, so it is safe to do the cast.
731
     */
732
    get_delta_hdr_size((unsigned char**)&data, top);
3735.33.4 by John Arbash Meinel
The new layout is working.
733
    entry = entries; /* start at the first slot */
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
734
    num_entries = 0; /* calculate the real number of entries */
735
    while (data < top) {
736
        cmd = *data++;
737
        if (cmd & 0x80) {
738
            /* Copy instruction, skip it */
739
            if (cmd & 0x01) data++;
740
            if (cmd & 0x02) data++;
741
            if (cmd & 0x04) data++;
742
            if (cmd & 0x08) data++;
743
            if (cmd & 0x10) data++;
744
            if (cmd & 0x20) data++;
745
            if (cmd & 0x40) data++;
746
        } else if (cmd) {
747
            /* Insert instruction, we want to index these bytes */
748
            if (data + cmd > top) {
749
                /* Invalid insert, not enough bytes in the delta */
750
                break;
751
            }
3735.33.5 by John Arbash Meinel
Restoring the debugging for now.
752
            /* The create_delta code requires a match at least 4 characters
753
             * (including only the last char of the RABIN_WINDOW) before it
754
             * will consider it something worth copying rather than inserting.
755
             * So we don't want to index anything that we know won't ever be a
756
             * match.
757
             */
758
            for (; cmd > RABIN_WINDOW + 3; cmd -= RABIN_WINDOW,
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
759
                                       data += RABIN_WINDOW) {
760
                unsigned int val = 0;
761
                for (i = 1; i <= RABIN_WINDOW; i++)
762
                    val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
763
                if (val != prev_val) {
764
                    /* Only keep the first of consecutive data */
765
                    prev_val = val;
3735.33.4 by John Arbash Meinel
The new layout is working.
766
                    num_entries++;
767
                    entry->ptr = data + RABIN_WINDOW;
768
                    entry->val = val;
769
                    entry->src = src;
3735.33.2 by John Arbash Meinel
Revert some of the previous work.
770
                    entry++;
3735.33.4 by John Arbash Meinel
The new layout is working.
771
                    if (num_entries > max_num_entries) {
772
                        /* We ran out of entry room, something is really wrong
773
                         */
774
                        break;
775
                    }
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
776
                }
777
            }
778
            /* Move the data pointer by whatever remainder is left */
779
            data += cmd;
780
        } else {
781
            /*
782
             * cmd == 0 is reserved for future encoding
783
             * extensions. In the mean time we must fail when
784
             * encountering them (might be data corruption).
785
             */
786
            break;
787
        }
788
    }
789
    if (data != top) {
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
790
        /* The source_info data passed was corrupted or otherwise invalid */
3735.33.4 by John Arbash Meinel
The new layout is working.
791
        free(entries);
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
792
        return DELTA_SOURCE_BAD;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
793
    }
794
    if (num_entries == 0) {
795
        /** Nothing to index **/
3735.33.4 by John Arbash Meinel
The new layout is working.
796
        free(entries);
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
797
        *fresh = old_index;
798
        return DELTA_OK;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
799
    }
3735.33.4 by John Arbash Meinel
The new layout is working.
800
    old_index->last_src = src;
801
    /* See if we can fill in these values into the holes in the array */
802
    entry = entries;
803
    num_inserted = 0;
804
    for (; num_entries > 0; --num_entries, ++entry) {
4634.106.2 by John Arbash Meinel
Restructure the internals to be a bit more understandable.
805
        struct index_entry *next_bucket_entry, *cur_entry, *bucket_first_entry;
3735.33.4 by John Arbash Meinel
The new layout is working.
806
        hash_offset = (entry->val & old_index->hash_mask);
807
        /* The basic structure is a hash => packed_entries that fit in that
808
         * hash bucket. Things are structured such that the hash-pointers are
809
         * strictly ordered. So we start by pointing to the next pointer, and
810
         * walk back until we stop getting NULL targets, and then go back
811
         * forward. If there are no NULL targets, then we know because
812
         * entry->ptr will not be NULL.
813
         */
4634.106.2 by John Arbash Meinel
Restructure the internals to be a bit more understandable.
814
        // The start of the next bucket, this may point past the end of the
815
        // entry table if hash_offset is the last bucket.
816
        next_bucket_entry = old_index->hash[hash_offset + 1];
817
        // First entry in this bucket
818
        bucket_first_entry = old_index->hash[hash_offset];
819
        cur_entry = next_bucket_entry - 1;
820
        while (cur_entry->ptr == NULL && cur_entry >= bucket_first_entry) {
821
            cur_entry--;
3735.33.4 by John Arbash Meinel
The new layout is working.
822
        }
4634.106.2 by John Arbash Meinel
Restructure the internals to be a bit more understandable.
823
        // cur_entry now either points at the first NULL, or it points to
824
        // next_bucket_entry if there were no blank spots.
825
        cur_entry++;
826
        if (cur_entry >= next_bucket_entry || cur_entry->ptr != NULL) {
3735.33.6 by John Arbash Meinel
Increasing EXTRA_NULLS to 2 from 1 ups the hit rate
827
            /* There is no room for this entry, we have to resize */
828
            // char buff[128];
829
            // get_text(buff, entry->ptr);
830
            // fprintf(stderr, "Failed to find an opening @%x for %8x:\n '%s'\n",
831
            //         hash_offset, entry->val, buff);
832
            // for (old_entry = old_index->hash[hash_offset];
833
            //      old_entry < old_index->hash[hash_offset+1];
834
            //      ++old_entry) {
835
            //     get_text(buff, old_entry->ptr);
836
            //     fprintf(stderr, "  [%2d] %8x %8x: '%s'\n",
837
            //             (int)(old_entry - old_index->hash[hash_offset]),
838
            //             old_entry->val, old_entry->ptr, buff);
839
            // }
3735.33.4 by John Arbash Meinel
The new layout is working.
840
            break;
841
        }
842
        num_inserted++;
4634.106.2 by John Arbash Meinel
Restructure the internals to be a bit more understandable.
843
        *cur_entry = *entry;
3735.33.4 by John Arbash Meinel
The new layout is working.
844
        /* For entries which we *do* manage to insert into old_index, we don't
845
         * want them double copied into the final output.
846
         */
847
        old_index->num_entries++;
848
    }
849
    if (num_entries > 0) {
850
        /* We couldn't fit the new entries into the old index, so allocate a
851
         * new one, and fill it with stuff.
852
         */
3735.33.6 by John Arbash Meinel
Increasing EXTRA_NULLS to 2 from 1 ups the hit rate
853
        // fprintf(stderr, "inserted %d before resize\n", num_inserted);
3735.33.4 by John Arbash Meinel
The new layout is working.
854
        new_index = create_index_from_old_and_new_entries(old_index,
855
            entry, num_entries);
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
856
    } else {
5698.2.3 by Martin
Change create_delta_index_from_delta too so NULL can be treated as MemoryError
857
        new_index = old_index;
3735.33.15 by John Arbash Meinel
Remove the noisy debugging code. (down to 23.1s)
858
        // fprintf(stderr, "inserted %d without resizing\n", num_inserted);
3735.33.4 by John Arbash Meinel
The new layout is working.
859
    }
860
    free(entries);
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
861
    /* create_index_from_old_and_new_entries returns NULL on malloc failure */
862
    if (!new_index)
863
        return DELTA_OUT_OF_MEMORY;
864
    *fresh = new_index;
865
    return DELTA_OK;
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
866
}
867
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
868
void free_delta_index(struct delta_index *index)
869
{
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
870
    free(index);
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
871
}
872
5361.2.2 by John Arbash Meinel
Fix some signed integer warnings in diff-delta.c
873
unsigned long
874
sizeof_delta_index(struct delta_index *index)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
875
{
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
876
    if (index)
877
        return index->memsize;
878
    else
879
        return 0;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
880
}
881
882
/*
883
 * The maximum size for any opcode sequence, including the initial header
884
 * plus Rabin window plus biggest copy.
885
 */
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
886
#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
887
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
888
delta_result
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
889
create_delta(const struct delta_index *index,
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
890
             const void *trg_buf, unsigned long trg_size,
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
891
             unsigned long *delta_size, unsigned long max_size,
892
             void **delta_data)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
893
{
5361.2.4 by John Arbash Meinel
Use signed ints to avoid the compiler warning.
894
    unsigned int i, outpos, outsize, moff, val;
895
    int msize;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
896
    const struct source_info *msource;
897
    int inscnt;
898
    const unsigned char *ref_data, *ref_top, *data, *top;
899
    unsigned char *out;
900
    unsigned long source_size;
901
902
    if (!trg_buf || !trg_size)
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
903
        return DELTA_BUFFER_EMPTY;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
904
    if (index == NULL)
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
905
        return DELTA_INDEX_NEEDED;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
906
907
    outpos = 0;
908
    outsize = 8192;
909
    if (max_size && outsize >= max_size)
910
        outsize = max_size + MAX_OP_SIZE + 1;
911
    out = malloc(outsize);
912
    if (!out)
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
913
        return DELTA_OUT_OF_MEMORY;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
914
915
    source_size = index->last_src->size + index->last_src->agg_offset;
916
917
    /* store target buffer size */
918
    i = trg_size;
919
    while (i >= 0x80) {
920
        out[outpos++] = i | 0x80;
921
        i >>= 7;
922
    }
923
    out[outpos++] = i;
924
925
    data = trg_buf;
926
    top = (const unsigned char *) trg_buf + trg_size;
927
928
    /* Start the matching by filling out with a simple 'insert' instruction, of
929
     * the first RABIN_WINDOW bytes of the input.
930
     */
931
    outpos++; /* leave a byte for the insert command */
932
    val = 0;
933
    for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
934
        out[outpos++] = *data;
935
        val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
936
    }
937
    /* we are now setup with an insert of 'i' bytes and val contains the RABIN
938
     * hash for those bytes, and data points to the RABIN_WINDOW+1 byte of
939
     * input.
940
     */
941
    inscnt = i;
942
943
    moff = 0;
944
    msize = 0;
945
    msource = NULL;
946
    while (data < top) {
947
        if (msize < 4096) {
948
            /* we don't have a 'worthy enough' match yet, so let's look for
949
             * one.
950
             */
951
            struct index_entry *entry;
952
            /* Shift the window by one byte. */
953
            val ^= U[data[-RABIN_WINDOW]];
954
            val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
955
            i = val & index->hash_mask;
956
            /* TODO: When using multiple indexes like this, the hash tables
957
             *       mapping val => index_entry become less efficient.
958
             *       You end up getting a lot more collisions in the hash,
959
             *       which doesn't actually lead to a entry->val match.
960
             */
3735.33.1 by John Arbash Meinel
(broken, in progress), change the data structures to allow for inserting small deltas.
961
            for (entry = index->hash[i];
962
                 entry < index->hash[i+1] && entry->src != NULL;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
963
                 entry++) {
964
                const unsigned char *ref;
965
                const unsigned char *src;
5361.2.4 by John Arbash Meinel
Use signed ints to avoid the compiler warning.
966
                int ref_size;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
967
                if (entry->val != val)
968
                    continue;
969
                ref = entry->ptr;
970
                src = data;
971
                ref_data = entry->src->buf;
972
                ref_top = ref_data + entry->src->size;
973
                ref_size = ref_top - ref;
974
                /* ref_size is the longest possible match that we could make
975
                 * here. If ref_size <= msize, then we know that we cannot
976
                 * match more bytes with this location that we have already
977
                 * matched.
978
                 */
5361.2.4 by John Arbash Meinel
Use signed ints to avoid the compiler warning.
979
                if (ref_size > (top - src))
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
980
                    ref_size = top - src;
981
                if (ref_size <= msize)
982
                    break;
983
                /* See how many bytes actually match at this location. */
984
                while (ref_size-- && *src++ == *ref)
985
                    ref++;
5361.2.4 by John Arbash Meinel
Use signed ints to avoid the compiler warning.
986
                if (msize < (ref - entry->ptr)) {
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
987
                    /* this is our best match so far */
988
                    msize = ref - entry->ptr;
989
                    msource = entry->src;
990
                    moff = entry->ptr - ref_data;
991
                    if (msize >= 4096) /* good enough */
992
                        break;
993
                }
994
            }
995
        }
996
997
        if (msize < 4) {
998
            /* The best match right now is less than 4 bytes long. So just add
999
             * the current byte to the insert instruction. Increment the insert
1000
             * counter, and copy the byte of data into the output buffer.
1001
             */
1002
            if (!inscnt)
1003
                outpos++;
1004
            out[outpos++] = *data++;
1005
            inscnt++;
1006
            if (inscnt == 0x7f) {
1007
                /* We have a max length insert instruction, finalize it in the
1008
                 * output.
1009
                 */
1010
                out[outpos - inscnt - 1] = inscnt;
1011
                inscnt = 0;
1012
            }
1013
            msize = 0;
1014
        } else {
1015
            unsigned int left;
1016
            unsigned char *op;
1017
1018
            if (inscnt) {
1019
                ref_data = msource->buf;
1020
                while (moff && ref_data[moff-1] == data[-1]) {
1021
                    /* we can match one byte back */
1022
                    msize++;
1023
                    moff--;
1024
                    data--;
1025
                    outpos--;
1026
                    if (--inscnt)
1027
                        continue;
1028
                    outpos--;  /* remove count slot */
1029
                    inscnt--;  /* make it -1 */
1030
                    break;
1031
                }
1032
                out[outpos - inscnt - 1] = inscnt;
1033
                inscnt = 0;
1034
            }
1035
1036
            /* A copy op is currently limited to 64KB (pack v2) */
1037
            left = (msize < 0x10000) ? 0 : (msize - 0x10000);
1038
            msize -= left;
1039
1040
            op = out + outpos++;
1041
            i = 0x80;
1042
1043
            /* moff is the offset in the local structure, for encoding, we need
1044
             * to push it into the global offset
1045
             */
1046
            assert(moff < msource->size);
1047
            moff += msource->agg_offset;
1048
            assert(moff + msize <= source_size);
1049
            if (moff & 0x000000ff)
1050
                out[outpos++] = moff >> 0,  i |= 0x01;
1051
            if (moff & 0x0000ff00)
1052
                out[outpos++] = moff >> 8,  i |= 0x02;
1053
            if (moff & 0x00ff0000)
1054
                out[outpos++] = moff >> 16, i |= 0x04;
1055
            if (moff & 0xff000000)
1056
                out[outpos++] = moff >> 24, i |= 0x08;
1057
            /* Put it back into local coordinates, in case we have multiple
1058
             * copies in a row.
1059
             */
1060
            moff -= msource->agg_offset;
1061
1062
            if (msize & 0x00ff)
1063
                out[outpos++] = msize >> 0, i |= 0x10;
1064
            if (msize & 0xff00)
1065
                out[outpos++] = msize >> 8, i |= 0x20;
1066
1067
            *op = i;
1068
1069
            data += msize;
1070
            moff += msize;
1071
            msize = left;
1072
1073
            if (msize < 4096) {
1074
                int j;
1075
                val = 0;
1076
                for (j = -RABIN_WINDOW; j < 0; j++)
1077
                    val = ((val << 8) | data[j])
1078
                          ^ T[val >> RABIN_SHIFT];
1079
            }
1080
        }
1081
1082
        if (outpos >= outsize - MAX_OP_SIZE) {
1083
            void *tmp = out;
1084
            outsize = outsize * 3 / 2;
1085
            if (max_size && outsize >= max_size)
1086
                outsize = max_size + MAX_OP_SIZE + 1;
1087
            if (max_size && outpos > max_size)
1088
                break;
1089
            out = realloc(out, outsize);
1090
            if (!out) {
1091
                free(tmp);
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
1092
                return DELTA_OUT_OF_MEMORY;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
1093
            }
1094
        }
1095
    }
1096
1097
    if (inscnt)
1098
        out[outpos - inscnt - 1] = inscnt;
1099
1100
    if (max_size && outpos > max_size) {
1101
        free(out);
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
1102
        return DELTA_SIZE_TOO_BIG;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
1103
    }
1104
1105
    *delta_size = outpos;
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
1106
    *delta_data = out;
1107
    return DELTA_OK;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
1108
}
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
1109
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
1110
1111
int
1112
get_entry_summary(const struct delta_index *index, int pos,
1113
                  unsigned int *text_offset, unsigned int *hash_val)
1114
{
1115
    int hsize;
1116
    const struct index_entry *entry;
1117
    const struct index_entry *start_of_entries;
1118
    unsigned int offset;
1119
    if (pos < 0 || text_offset == NULL || hash_val == NULL
1120
        || index == NULL)
1121
    {
1122
        return 0;
1123
    }
1124
    hsize = index->hash_mask + 1;
1125
    start_of_entries = (struct index_entry *)(((struct index_entry **)index->hash) + (hsize + 1));
1126
    entry = start_of_entries + pos;
1127
    if (entry > index->last_entry) {
1128
        return 0;
1129
    }
1130
    if (entry->ptr == NULL) {
1131
        *text_offset = 0;
1132
        *hash_val = 0;
1133
    } else {
1134
        offset = entry->src->agg_offset;
1135
        offset += (entry->ptr - ((unsigned char *)entry->src->buf));
1136
        *text_offset = offset;
1137
        *hash_val = entry->val;
1138
    }
1139
    return 1;
1140
}
1141
1142
1143
int
1144
get_hash_offset(const struct delta_index *index, int pos,
1145
                unsigned int *entry_offset)
1146
{
1147
    int hsize;
1148
    const struct index_entry *entry;
1149
    const struct index_entry *start_of_entries;
1150
    if (pos < 0 || index == NULL || entry_offset == NULL)
1151
    {
1152
        return 0;
1153
    }
1154
    hsize = index->hash_mask + 1;
1155
    start_of_entries = (struct index_entry *)(((struct index_entry **)index->hash) + (hsize + 1));
1156
    if (pos >= hsize) {
1157
        return 0;
1158
    }
1159
    entry = index->hash[pos];
1160
    if (entry == NULL) {
1161
        *entry_offset = -1;
1162
    } else {
1163
        *entry_offset = (entry - start_of_entries);
1164
    }
1165
    return 1;
1166
}
1167
1168
1169
unsigned int
1170
rabin_hash(const unsigned char *data)
1171
{
1172
    int i;
1173
    unsigned int val = 0;
1174
    for (i = 0; i < RABIN_WINDOW; i++)
1175
        val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
1176
    return val;
1177
}
1178
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
1179
/* vim: et ts=4 sw=4 sts=4
0.23.36 by John Arbash Meinel
Track down a memory leak in the refactored diff-delta.c code.
1180
 */