/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
1
/*
2
 * diff-delta.c: generate a delta between two buffers
3
 *
4
 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5
 * http://www.xmailserver.org/xdiff-lib.html
6
 *
7
 * Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
8
 *
9
 * This code is free software; you can redistribute it and/or modify
10
 * it under the terms of the GNU General Public License version 2 as
11
 * published by the Free Software Foundation.
12
 */
13
14
#include "delta.h"
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
15
#include <assert.h>
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
16
17
/* maximum hash entry list for the same hash bucket */
18
#define HASH_LIMIT 64
19
20
#define RABIN_SHIFT 23
21
#define RABIN_WINDOW 16
22
23
static const unsigned int T[256] = {
24
	0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25
	0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26
	0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27
	0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28
	0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29
	0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30
	0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31
	0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32
	0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33
	0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34
	0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35
	0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36
	0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37
	0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38
	0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39
	0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40
	0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41
	0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42
	0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43
	0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44
	0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45
	0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46
	0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47
	0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48
	0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49
	0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50
	0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51
	0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52
	0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53
	0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54
	0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55
	0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56
	0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57
	0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58
	0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59
	0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60
	0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61
	0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62
	0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63
	0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64
	0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65
	0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66
	0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
67
};
68
69
static const unsigned int U[256] = {
70
	0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71
	0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72
	0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73
	0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74
	0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75
	0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76
	0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77
	0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78
	0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79
	0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80
	0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81
	0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82
	0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83
	0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84
	0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85
	0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86
	0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87
	0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88
	0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89
	0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90
	0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91
	0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92
	0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93
	0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94
	0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95
	0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96
	0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97
	0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98
	0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99
	0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100
	0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101
	0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102
	0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103
	0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104
	0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105
	0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106
	0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107
	0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108
	0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109
	0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110
	0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111
	0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112
	0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
113
};
114
115
struct index_entry {
116
	const unsigned char *ptr;
117
	unsigned int val;
118
};
119
120
struct unpacked_index_entry {
121
	struct index_entry entry;
122
	struct unpacked_index_entry *next;
123
};
124
125
struct delta_index {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
126
	unsigned long memsize; /* Total bytes pointed to by this index */
127
	const void *src_buf; /* Pointer to the beginning of source data */
128
	unsigned long src_size; /* Total length of source data */
129
	unsigned long agg_src_offset; /* Start of source data as part of the
130
									 aggregate source */
131
	unsigned int hash_mask; /* val & hash_mask gives the hash index for a given
132
							   entry */
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
133
	struct index_entry *hash[];
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
134
};
135
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
136
static unsigned int
137
limit_hash_buckets(struct unpacked_index_entry **hash,
138
				   unsigned int *hash_count, unsigned int hsize,
139
				   unsigned int entries)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
140
{
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
141
	struct unpacked_index_entry *entry;
142
	unsigned int i;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
143
	/*
144
	 * Determine a limit on the number of entries in the same hash
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
145
	 * bucket.	This guards us against pathological data sets causing
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
146
	 * really bad hash distribution with most entries in the same hash
147
	 * bucket that would bring us to O(m*n) computing costs (m and n
148
	 * corresponding to reference and target buffer sizes).
149
	 *
150
	 * Make sure none of the hash buckets has more entries than
151
	 * we're willing to test.  Otherwise we cull the entry list
152
	 * uniformly to still preserve a good repartition across
153
	 * the reference buffer.
154
	 */
155
	for (i = 0; i < hsize; i++) {
156
		int acc;
157
158
		if (hash_count[i] <= HASH_LIMIT)
159
			continue;
160
161
		/* We leave exactly HASH_LIMIT entries in the bucket */
162
		entries -= hash_count[i] - HASH_LIMIT;
163
164
		entry = hash[i];
165
		acc = 0;
166
167
		/*
168
		 * Assume that this loop is gone through exactly
169
		 * HASH_LIMIT times and is entered and left with
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
170
		 * acc==0.	So the first statement in the loop
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
171
		 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
172
		 * to the accumulator, and the inner loop consequently
173
		 * is run (hash_count[i]-HASH_LIMIT) times, removing
174
		 * one element from the list each time.  Since acc
175
		 * balances out to 0 at the final run, the inner loop
176
		 * body can't be left with entry==NULL.  So we indeed
177
		 * encounter entry==NULL in the outer loop only.
178
		 */
179
		do {
180
			acc += hash_count[i] - HASH_LIMIT;
181
			if (acc > 0) {
182
				struct unpacked_index_entry *keep = entry;
183
				do {
184
					entry = entry->next;
185
					acc -= HASH_LIMIT;
186
				} while (acc > 0);
187
				keep->next = entry->next;
188
			}
189
			entry = entry->next;
190
		} while (entry);
191
	}
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
192
	return entries;
193
}
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
194
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
195
static struct delta_index *
196
pack_delta_index(struct unpacked_index_entry **hash, unsigned int hsize,
197
				 unsigned int entries)
198
{
199
	unsigned int i, memsize;
200
	struct unpacked_index_entry *entry;
201
	struct delta_index *index;
202
	struct index_entry *packed_entry, **packed_hash;
203
	void *mem;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
204
	/*
205
	 * Now create the packed index in array form
206
	 * rather than linked lists.
207
	 */
208
	memsize = sizeof(*index)
209
		+ sizeof(*packed_hash) * (hsize+1)
210
		+ sizeof(*packed_entry) * entries;
211
	mem = malloc(memsize);
212
	if (!mem) {
213
		return NULL;
214
	}
215
216
	index = mem;
217
	index->memsize = memsize;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
218
	index->hash_mask = hsize - 1;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
219
220
	mem = index->hash;
221
	packed_hash = mem;
222
	mem = packed_hash + (hsize+1);
223
	packed_entry = mem;
224
225
	for (i = 0; i < hsize; i++) {
226
		/*
227
		 * Coalesce all entries belonging to one linked list
228
		 * into consecutive array entries.
229
		 */
230
		packed_hash[i] = packed_entry;
231
		for (entry = hash[i]; entry; entry = entry->next)
232
			*packed_entry++ = entry->entry;
233
	}
234
235
	/* Sentinel value to indicate the length of the last hash bucket */
236
	packed_hash[hsize] = packed_entry;
237
238
	assert(packed_entry - (struct index_entry *)mem == entries);
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
239
	return index;
240
}
241
242
243
struct delta_index * create_delta_index(const void *buf, unsigned long bufsize,
244
										unsigned long agg_src_offset)
245
{
246
	unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
247
	const unsigned char *data, *buffer = buf;
248
	struct delta_index *index;
249
	struct unpacked_index_entry *entry, **hash;
250
	void *mem;
251
	unsigned long memsize;
252
253
	if (!buf || !bufsize)
254
		return NULL;
255
256
	/* Determine index hash size.  Note that indexing skips the
257
	   first byte to allow for optimizing the Rabin's polynomial
258
	   initialization in create_delta(). */
259
	entries = (bufsize - 1)  / RABIN_WINDOW;
260
	hsize = entries / 4;
261
	for (i = 4; (1u << i) < hsize && i < 31; i++);
262
	hsize = 1 << i;
263
	hmask = hsize - 1;
264
265
	/* allocate lookup index */
266
	memsize = sizeof(*hash) * hsize +
267
		  sizeof(*entry) * entries;
268
	mem = malloc(memsize);
269
	if (!mem)
270
		return NULL;
271
	hash = mem;
272
	mem = hash + hsize;
273
	entry = mem;
274
275
	memset(hash, 0, hsize * sizeof(*hash));
276
277
	/* allocate an array to count hash entries */
278
	hash_count = calloc(hsize, sizeof(*hash_count));
279
	if (!hash_count) {
280
		free(hash);
281
		return NULL;
282
	}
283
284
	/* then populate the index */
285
	prev_val = ~0;
286
	for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
287
		 data >= buffer;
288
		 data -= RABIN_WINDOW) {
289
		unsigned int val = 0;
290
		for (i = 1; i <= RABIN_WINDOW; i++)
291
			val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
292
		if (val == prev_val) {
293
			/* keep the lowest of consecutive identical blocks */
294
			entry[-1].entry.ptr = data + RABIN_WINDOW;
295
			--entries;
296
		} else {
297
			prev_val = val;
298
			i = val & hmask;
299
			entry->entry.ptr = data + RABIN_WINDOW;
300
			entry->entry.val = val;
301
			entry->next = hash[i];
302
			hash[i] = entry++;
303
			hash_count[i]++;
304
		}
305
	}
306
307
	entries = limit_hash_buckets(hash, hash_count, hsize, entries);
308
	free(hash_count);
309
	index = pack_delta_index(hash, hsize, entries);
0.23.36 by John Arbash Meinel
Track down a memory leak in the refactored diff-delta.c code.
310
	free(hash);
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
311
	if (!index) {
312
		return NULL;
313
	}
314
	index->src_buf = buf;
315
	index->src_size = bufsize;
316
	index->agg_src_offset = agg_src_offset;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
317
	return index;
318
}
319
320
void free_delta_index(struct delta_index *index)
321
{
322
	free(index);
323
}
324
325
unsigned long sizeof_delta_index(struct delta_index *index)
326
{
327
	if (index)
328
		return index->memsize;
329
	else
330
		return 0;
331
}
332
333
/*
334
 * The maximum size for any opcode sequence, including the initial header
335
 * plus Rabin window plus biggest copy.
336
 */
337
#define MAX_OP_SIZE	(5 + 5 + 1 + RABIN_WINDOW + 7)
338
339
void *
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
340
create_delta(struct delta_index **indexes,
341
			 unsigned int num_indexes,
342
			 const void *trg_buf, unsigned long trg_size,
343
			 unsigned long *delta_size, unsigned long max_size)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
344
{
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
345
	unsigned int i, j, outpos, outsize, moff, msize, val;
346
	const struct delta_index *index, *mindex;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
347
	int inscnt;
348
	const unsigned char *ref_data, *ref_top, *data, *top;
349
	unsigned char *out;
350
351
	if (!trg_buf || !trg_size)
352
		return NULL;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
353
	if (num_indexes == 0)
354
		return NULL;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
355
356
	outpos = 0;
357
	outsize = 8192;
358
	if (max_size && outsize >= max_size)
359
		outsize = max_size + MAX_OP_SIZE + 1;
360
	out = malloc(outsize);
361
	if (!out)
362
		return NULL;
363
364
	/* store reference buffer size */
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
365
	i = 0;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
366
	index = indexes[0];
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
367
	for (j = 0; j < num_indexes; ++j) {
368
		index = indexes[j];
369
		i += index->src_size;
370
	}
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
371
	assert(i <= index->src_size + index->agg_src_offset);
372
	i = index->src_size + index->agg_src_offset;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
373
	while (i >= 0x80) {
374
		out[outpos++] = i | 0x80;
375
		i >>= 7;
376
	}
377
	out[outpos++] = i;
378
379
	/* store target buffer size */
380
	i = trg_size;
381
	while (i >= 0x80) {
382
		out[outpos++] = i | 0x80;
383
		i >>= 7;
384
	}
385
	out[outpos++] = i;
386
387
	data = trg_buf;
388
	top = (const unsigned char *) trg_buf + trg_size;
389
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
390
	/* Start the matching by filling out with a simple 'insert' instruction, of
391
	 * the first RABIN_WINDOW bytes of the input.
392
	 */
393
	outpos++; /* leave a byte for the insert command */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
394
	val = 0;
395
	for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
396
		out[outpos++] = *data;
397
		val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
398
	}
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
399
	/* we are now setup with an insert of 'i' bytes and val contains the RABIN
400
	 * hash for those bytes, and data points to the RABIN_WINDOW+1 byte of
401
	 * input.
402
	 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
403
	inscnt = i;
404
405
	moff = 0;
406
	msize = 0;
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
407
	mindex = NULL;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
408
	while (data < top) {
409
		if (msize < 4096) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
410
			/* we don't have a 'worthy enough' match yet, so let's look for
411
			 * one.
412
			 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
413
			struct index_entry *entry;
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
414
			/* Shift the window by one byte. */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
415
			val ^= U[data[-RABIN_WINDOW]];
416
			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
417
			for (j = 0; j < num_indexes; ++j) {
418
				index = indexes[j];
419
				i = val & index->hash_mask;
420
				ref_top = index->src_buf + index->src_size;
421
				ref_data = index->src_buf;
0.23.31 by John Arbash Meinel
Add a bit of comments about things to do.
422
				/* TODO: When using multiple indexes like this, the hash tables
423
				 *		 mapping val => index_entry become less efficient.
424
				 *		 You end up getting a lot more collisions in the hash,
425
				 *		 which doesn't actually lead to a entry->val match.
426
				 */
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
427
				for (entry = index->hash[i]; entry < index->hash[i+1];
428
					 entry++) {
429
					const unsigned char *ref = entry->ptr;
430
					const unsigned char *src = data;
431
					unsigned int ref_size = ref_top - ref;
432
					if (entry->val != val)
433
						continue;
434
					/* ref_size is the longest possible match that we could make
435
					 * here. If ref_size <= msize, then we know that we cannot
436
					 * match more bytes with this location that we have already
437
					 * matched.
438
					 */
439
					if (ref_size > top - src)
440
						ref_size = top - src;
441
					if (ref_size <= msize)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
442
						break;
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
443
					/* See how many bytes actually match at this location. */
444
					while (ref_size-- && *src++ == *ref)
445
						ref++;
446
					if (msize < ref - entry->ptr) {
447
						/* this is our best match so far */
448
						msize = ref - entry->ptr;
449
						mindex = index;
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
450
						moff = entry->ptr - ref_data;
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
451
						if (msize >= 4096) /* good enough */
452
							break;
453
					}
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
454
				}
455
			}
456
		}
457
458
		if (msize < 4) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
459
			/* The best match right now is less than 4 bytes long. So just add
460
			 * the current byte to the insert instruction. Increment the insert
461
			 * counter, and copy the byte of data into the output buffer.
462
			 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
463
			if (!inscnt)
464
				outpos++;
465
			out[outpos++] = *data++;
466
			inscnt++;
467
			if (inscnt == 0x7f) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
468
				/* We have a max length insert instruction, finalize it in the
469
				 * output.
470
				 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
471
				out[outpos - inscnt - 1] = inscnt;
472
				inscnt = 0;
473
			}
474
			msize = 0;
475
		} else {
476
			unsigned int left;
477
			unsigned char *op;
478
479
			if (inscnt) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
480
				ref_data = mindex->src_buf;
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
481
				while (moff && ref_data[moff-1] == data[-1]) {
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
482
					/* we can match one byte back */
483
					msize++;
484
					moff--;
485
					data--;
486
					outpos--;
487
					if (--inscnt)
488
						continue;
489
					outpos--;  /* remove count slot */
490
					inscnt--;  /* make it -1 */
491
					break;
492
				}
493
				out[outpos - inscnt - 1] = inscnt;
494
				inscnt = 0;
495
			}
496
497
			/* A copy op is currently limited to 64KB (pack v2) */
498
			left = (msize < 0x10000) ? 0 : (msize - 0x10000);
499
			msize -= left;
500
501
			op = out + outpos++;
502
			i = 0x80;
503
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
504
			/* moff is the offset in the local structure, for encoding, we need
505
			 * to push it into the global offset
506
			 */
507
			moff += mindex->agg_src_offset;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
508
			if (moff & 0x000000ff)
509
				out[outpos++] = moff >> 0,  i |= 0x01;
510
			if (moff & 0x0000ff00)
511
				out[outpos++] = moff >> 8,  i |= 0x02;
512
			if (moff & 0x00ff0000)
513
				out[outpos++] = moff >> 16, i |= 0x04;
514
			if (moff & 0xff000000)
515
				out[outpos++] = moff >> 24, i |= 0x08;
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
516
			/* Put it back into local coordinates, in case we have multiple
517
			 * copies in a row.
518
			 */
519
			moff -= mindex->agg_src_offset;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
520
521
			if (msize & 0x00ff)
522
				out[outpos++] = msize >> 0, i |= 0x10;
523
			if (msize & 0xff00)
524
				out[outpos++] = msize >> 8, i |= 0x20;
525
526
			*op = i;
527
528
			data += msize;
529
			moff += msize;
530
			msize = left;
531
532
			if (msize < 4096) {
533
				int j;
534
				val = 0;
535
				for (j = -RABIN_WINDOW; j < 0; j++)
536
					val = ((val << 8) | data[j])
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
537
						  ^ T[val >> RABIN_SHIFT];
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
538
			}
539
		}
540
541
		if (outpos >= outsize - MAX_OP_SIZE) {
542
			void *tmp = out;
543
			outsize = outsize * 3 / 2;
544
			if (max_size && outsize >= max_size)
545
				outsize = max_size + MAX_OP_SIZE + 1;
546
			if (max_size && outpos > max_size)
547
				break;
548
			out = realloc(out, outsize);
549
			if (!out) {
550
				free(tmp);
551
				return NULL;
552
			}
553
		}
554
	}
555
556
	if (inscnt)
557
		out[outpos - inscnt - 1] = inscnt;
558
559
	if (max_size && outpos > max_size) {
560
		free(out);
561
		return NULL;
562
	}
563
564
	*delta_size = outpos;
565
	return out;
566
}
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
567
0.23.36 by John Arbash Meinel
Track down a memory leak in the refactored diff-delta.c code.
568
/* vim: noet ts=4 sw=4 sts=4
569
 */