/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
1
/*
2
 * diff-delta.c: generate a delta between two buffers
3
 *
4
 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5
 * http://www.xmailserver.org/xdiff-lib.html
6
 *
7
 * Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
8
 *
9
 * This code is free software; you can redistribute it and/or modify
10
 * it under the terms of the GNU General Public License version 2 as
11
 * published by the Free Software Foundation.
12
 */
13
14
#include "delta.h"
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
15
#include <assert.h>
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
16
17
/* maximum hash entry list for the same hash bucket */
18
#define HASH_LIMIT 64
19
20
#define RABIN_SHIFT 23
21
#define RABIN_WINDOW 16
22
23
static const unsigned int T[256] = {
24
	0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25
	0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26
	0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27
	0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28
	0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29
	0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30
	0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31
	0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32
	0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33
	0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34
	0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35
	0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36
	0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37
	0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38
	0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39
	0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40
	0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41
	0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42
	0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43
	0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44
	0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45
	0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46
	0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47
	0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48
	0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49
	0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50
	0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51
	0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52
	0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53
	0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54
	0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55
	0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56
	0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57
	0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58
	0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59
	0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60
	0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61
	0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62
	0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63
	0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64
	0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65
	0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66
	0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
67
};
68
69
static const unsigned int U[256] = {
70
	0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71
	0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72
	0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73
	0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74
	0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75
	0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76
	0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77
	0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78
	0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79
	0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80
	0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81
	0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82
	0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83
	0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84
	0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85
	0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86
	0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87
	0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88
	0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89
	0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90
	0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91
	0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92
	0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93
	0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94
	0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95
	0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96
	0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97
	0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98
	0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99
	0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100
	0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101
	0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102
	0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103
	0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104
	0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105
	0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106
	0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107
	0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108
	0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109
	0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110
	0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111
	0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112
	0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
113
};
114
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
115
struct source_info {
116
	const void *src_buf; /* Pointer to the beginning of source data */
117
	unsigned long src_size; /* Total length of source data */
118
	unsigned long agg_src_offset; /* Start of source data as part of the
119
									 aggregate source */
120
};
121
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
122
struct index_entry {
123
	const unsigned char *ptr;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
124
	const struct source_info *src;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
125
	unsigned int val;
126
};
127
128
struct unpacked_index_entry {
129
	struct index_entry entry;
130
	struct unpacked_index_entry *next;
131
};
132
133
struct delta_index {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
134
	unsigned long memsize; /* Total bytes pointed to by this index */
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
135
	struct source_info src; /* Information about the referenced source */
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
136
	unsigned int hash_mask; /* val & hash_mask gives the hash index for a given
137
							   entry */
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
138
	unsigned int num_entries; /* The total number of entries in this index */
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
139
	struct index_entry *hash[];
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
140
};
141
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
142
static unsigned int
143
limit_hash_buckets(struct unpacked_index_entry **hash,
144
				   unsigned int *hash_count, unsigned int hsize,
145
				   unsigned int entries)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
146
{
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
147
	struct unpacked_index_entry *entry;
148
	unsigned int i;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
149
	/*
150
	 * Determine a limit on the number of entries in the same hash
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
151
	 * bucket.	This guards us against pathological data sets causing
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
152
	 * really bad hash distribution with most entries in the same hash
153
	 * bucket that would bring us to O(m*n) computing costs (m and n
154
	 * corresponding to reference and target buffer sizes).
155
	 *
156
	 * Make sure none of the hash buckets has more entries than
157
	 * we're willing to test.  Otherwise we cull the entry list
158
	 * uniformly to still preserve a good repartition across
159
	 * the reference buffer.
160
	 */
161
	for (i = 0; i < hsize; i++) {
162
		int acc;
163
164
		if (hash_count[i] <= HASH_LIMIT)
165
			continue;
166
167
		/* We leave exactly HASH_LIMIT entries in the bucket */
168
		entries -= hash_count[i] - HASH_LIMIT;
169
170
		entry = hash[i];
171
		acc = 0;
172
173
		/*
174
		 * Assume that this loop is gone through exactly
175
		 * HASH_LIMIT times and is entered and left with
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
176
		 * acc==0.	So the first statement in the loop
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
177
		 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
178
		 * to the accumulator, and the inner loop consequently
179
		 * is run (hash_count[i]-HASH_LIMIT) times, removing
180
		 * one element from the list each time.  Since acc
181
		 * balances out to 0 at the final run, the inner loop
182
		 * body can't be left with entry==NULL.  So we indeed
183
		 * encounter entry==NULL in the outer loop only.
184
		 */
185
		do {
186
			acc += hash_count[i] - HASH_LIMIT;
187
			if (acc > 0) {
188
				struct unpacked_index_entry *keep = entry;
189
				do {
190
					entry = entry->next;
191
					acc -= HASH_LIMIT;
192
				} while (acc > 0);
193
				keep->next = entry->next;
194
			}
195
			entry = entry->next;
196
		} while (entry);
197
	}
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
198
	return entries;
199
}
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
200
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
201
static struct delta_index *
202
pack_delta_index(struct unpacked_index_entry **hash, unsigned int hsize,
203
				 unsigned int entries)
204
{
205
	unsigned int i, memsize;
206
	struct unpacked_index_entry *entry;
207
	struct delta_index *index;
208
	struct index_entry *packed_entry, **packed_hash;
209
	void *mem;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
210
	/*
211
	 * Now create the packed index in array form
212
	 * rather than linked lists.
213
	 */
214
	memsize = sizeof(*index)
215
		+ sizeof(*packed_hash) * (hsize+1)
216
		+ sizeof(*packed_entry) * entries;
217
	mem = malloc(memsize);
218
	if (!mem) {
219
		return NULL;
220
	}
221
222
	index = mem;
223
	index->memsize = memsize;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
224
	index->hash_mask = hsize - 1;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
225
	index->num_entries = entries;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
226
227
	mem = index->hash;
228
	packed_hash = mem;
229
	mem = packed_hash + (hsize+1);
230
	packed_entry = mem;
231
232
	for (i = 0; i < hsize; i++) {
233
		/*
234
		 * Coalesce all entries belonging to one linked list
235
		 * into consecutive array entries.
236
		 */
237
		packed_hash[i] = packed_entry;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
238
		for (entry = hash[i]; entry; entry = entry->next, ++packed_entry) {
239
			*packed_entry = entry->entry;
240
			packed_entry->src = &index->src;
241
		}
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
242
	}
243
244
	/* Sentinel value to indicate the length of the last hash bucket */
245
	packed_hash[hsize] = packed_entry;
246
247
	assert(packed_entry - (struct index_entry *)mem == entries);
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
248
	return index;
249
}
250
251
252
struct delta_index * create_delta_index(const void *buf, unsigned long bufsize,
253
										unsigned long agg_src_offset)
254
{
255
	unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
256
	const unsigned char *data, *buffer = buf;
257
	struct delta_index *index;
258
	struct unpacked_index_entry *entry, **hash;
259
	void *mem;
260
	unsigned long memsize;
261
262
	if (!buf || !bufsize)
263
		return NULL;
264
265
	/* Determine index hash size.  Note that indexing skips the
266
	   first byte to allow for optimizing the Rabin's polynomial
267
	   initialization in create_delta(). */
268
	entries = (bufsize - 1)  / RABIN_WINDOW;
269
	hsize = entries / 4;
270
	for (i = 4; (1u << i) < hsize && i < 31; i++);
271
	hsize = 1 << i;
272
	hmask = hsize - 1;
273
274
	/* allocate lookup index */
275
	memsize = sizeof(*hash) * hsize +
276
		  sizeof(*entry) * entries;
277
	mem = malloc(memsize);
278
	if (!mem)
279
		return NULL;
280
	hash = mem;
281
	mem = hash + hsize;
282
	entry = mem;
283
284
	memset(hash, 0, hsize * sizeof(*hash));
285
286
	/* allocate an array to count hash entries */
287
	hash_count = calloc(hsize, sizeof(*hash_count));
288
	if (!hash_count) {
289
		free(hash);
290
		return NULL;
291
	}
292
293
	/* then populate the index */
294
	prev_val = ~0;
295
	for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
296
		 data >= buffer;
297
		 data -= RABIN_WINDOW) {
298
		unsigned int val = 0;
299
		for (i = 1; i <= RABIN_WINDOW; i++)
300
			val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
301
		if (val == prev_val) {
302
			/* keep the lowest of consecutive identical blocks */
303
			entry[-1].entry.ptr = data + RABIN_WINDOW;
304
			--entries;
305
		} else {
306
			prev_val = val;
307
			i = val & hmask;
308
			entry->entry.ptr = data + RABIN_WINDOW;
309
			entry->entry.val = val;
310
			entry->next = hash[i];
311
			hash[i] = entry++;
312
			hash_count[i]++;
313
		}
314
	}
315
316
	entries = limit_hash_buckets(hash, hash_count, hsize, entries);
317
	free(hash_count);
318
	index = pack_delta_index(hash, hsize, entries);
0.23.36 by John Arbash Meinel
Track down a memory leak in the refactored diff-delta.c code.
319
	free(hash);
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
320
	if (!index) {
321
		return NULL;
322
	}
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
323
	index->src.src_buf = buf;
324
	index->src.src_size = bufsize;
325
	index->src.agg_src_offset = agg_src_offset;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
326
	return index;
327
}
328
329
void free_delta_index(struct delta_index *index)
330
{
331
	free(index);
332
}
333
334
unsigned long sizeof_delta_index(struct delta_index *index)
335
{
336
	if (index)
337
		return index->memsize;
338
	else
339
		return 0;
340
}
341
342
/*
343
 * The maximum size for any opcode sequence, including the initial header
344
 * plus Rabin window plus biggest copy.
345
 */
346
#define MAX_OP_SIZE	(5 + 5 + 1 + RABIN_WINDOW + 7)
347
348
void *
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
349
create_delta(struct delta_index **indexes,
350
			 unsigned int num_indexes,
351
			 const void *trg_buf, unsigned long trg_size,
352
			 unsigned long *delta_size, unsigned long max_size)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
353
{
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
354
	unsigned int i, j, outpos, outsize, moff, msize, val;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
355
	const struct delta_index *index;
356
	const struct source_info *msource;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
357
	int inscnt;
358
	const unsigned char *ref_data, *ref_top, *data, *top;
359
	unsigned char *out;
360
361
	if (!trg_buf || !trg_size)
362
		return NULL;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
363
	if (num_indexes == 0)
364
		return NULL;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
365
366
	outpos = 0;
367
	outsize = 8192;
368
	if (max_size && outsize >= max_size)
369
		outsize = max_size + MAX_OP_SIZE + 1;
370
	out = malloc(outsize);
371
	if (!out)
372
		return NULL;
373
374
	/* store reference buffer size */
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
375
	i = 0;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
376
	index = indexes[0];
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
377
	for (j = 0; j < num_indexes; ++j) {
378
		index = indexes[j];
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
379
		i += index->src.src_size;
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
380
	}
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
381
	assert(i <= index->src.src_size + index->src.agg_src_offset);
382
	i = index->src.src_size + index->src.agg_src_offset;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
383
	while (i >= 0x80) {
384
		out[outpos++] = i | 0x80;
385
		i >>= 7;
386
	}
387
	out[outpos++] = i;
388
389
	/* store target buffer size */
390
	i = trg_size;
391
	while (i >= 0x80) {
392
		out[outpos++] = i | 0x80;
393
		i >>= 7;
394
	}
395
	out[outpos++] = i;
396
397
	data = trg_buf;
398
	top = (const unsigned char *) trg_buf + trg_size;
399
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
400
	/* Start the matching by filling out with a simple 'insert' instruction, of
401
	 * the first RABIN_WINDOW bytes of the input.
402
	 */
403
	outpos++; /* leave a byte for the insert command */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
404
	val = 0;
405
	for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
406
		out[outpos++] = *data;
407
		val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
408
	}
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
409
	/* we are now setup with an insert of 'i' bytes and val contains the RABIN
410
	 * hash for those bytes, and data points to the RABIN_WINDOW+1 byte of
411
	 * input.
412
	 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
413
	inscnt = i;
414
415
	moff = 0;
416
	msize = 0;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
417
	msource = NULL;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
418
	while (data < top) {
419
		if (msize < 4096) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
420
			/* we don't have a 'worthy enough' match yet, so let's look for
421
			 * one.
422
			 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
423
			struct index_entry *entry;
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
424
			/* Shift the window by one byte. */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
425
			val ^= U[data[-RABIN_WINDOW]];
426
			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
427
			for (j = 0; j < num_indexes; ++j) {
428
				index = indexes[j];
429
				i = val & index->hash_mask;
0.23.31 by John Arbash Meinel
Add a bit of comments about things to do.
430
				/* TODO: When using multiple indexes like this, the hash tables
431
				 *		 mapping val => index_entry become less efficient.
432
				 *		 You end up getting a lot more collisions in the hash,
433
				 *		 which doesn't actually lead to a entry->val match.
434
				 */
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
435
				for (entry = index->hash[i]; entry < index->hash[i+1];
436
					 entry++) {
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
437
					const unsigned char *ref;
438
					const unsigned char *src;
439
					unsigned int ref_size;
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
440
					if (entry->val != val)
441
						continue;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
442
					ref = entry->ptr;
443
					src = data;
444
					ref_data = entry->src->src_buf;
445
					ref_top = ref_data + entry->src->src_size;
446
					ref_size = ref_top - ref;
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
447
					/* ref_size is the longest possible match that we could make
448
					 * here. If ref_size <= msize, then we know that we cannot
449
					 * match more bytes with this location that we have already
450
					 * matched.
451
					 */
452
					if (ref_size > top - src)
453
						ref_size = top - src;
454
					if (ref_size <= msize)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
455
						break;
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
456
					/* See how many bytes actually match at this location. */
457
					while (ref_size-- && *src++ == *ref)
458
						ref++;
459
					if (msize < ref - entry->ptr) {
460
						/* this is our best match so far */
461
						msize = ref - entry->ptr;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
462
						msource = entry->src;
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
463
						moff = entry->ptr - ref_data;
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
464
						if (msize >= 4096) /* good enough */
465
							break;
466
					}
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
467
				}
468
			}
469
		}
470
471
		if (msize < 4) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
472
			/* The best match right now is less than 4 bytes long. So just add
473
			 * the current byte to the insert instruction. Increment the insert
474
			 * counter, and copy the byte of data into the output buffer.
475
			 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
476
			if (!inscnt)
477
				outpos++;
478
			out[outpos++] = *data++;
479
			inscnt++;
480
			if (inscnt == 0x7f) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
481
				/* We have a max length insert instruction, finalize it in the
482
				 * output.
483
				 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
484
				out[outpos - inscnt - 1] = inscnt;
485
				inscnt = 0;
486
			}
487
			msize = 0;
488
		} else {
489
			unsigned int left;
490
			unsigned char *op;
491
492
			if (inscnt) {
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
493
				ref_data = msource->src_buf;
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
494
				while (moff && ref_data[moff-1] == data[-1]) {
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
495
					/* we can match one byte back */
496
					msize++;
497
					moff--;
498
					data--;
499
					outpos--;
500
					if (--inscnt)
501
						continue;
502
					outpos--;  /* remove count slot */
503
					inscnt--;  /* make it -1 */
504
					break;
505
				}
506
				out[outpos - inscnt - 1] = inscnt;
507
				inscnt = 0;
508
			}
509
510
			/* A copy op is currently limited to 64KB (pack v2) */
511
			left = (msize < 0x10000) ? 0 : (msize - 0x10000);
512
			msize -= left;
513
514
			op = out + outpos++;
515
			i = 0x80;
516
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
517
			/* moff is the offset in the local structure, for encoding, we need
518
			 * to push it into the global offset
519
			 */
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
520
			moff += msource->agg_src_offset;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
521
			if (moff & 0x000000ff)
522
				out[outpos++] = moff >> 0,  i |= 0x01;
523
			if (moff & 0x0000ff00)
524
				out[outpos++] = moff >> 8,  i |= 0x02;
525
			if (moff & 0x00ff0000)
526
				out[outpos++] = moff >> 16, i |= 0x04;
527
			if (moff & 0xff000000)
528
				out[outpos++] = moff >> 24, i |= 0x08;
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
529
			/* Put it back into local coordinates, in case we have multiple
530
			 * copies in a row.
531
			 */
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
532
			moff -= msource->agg_src_offset;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
533
534
			if (msize & 0x00ff)
535
				out[outpos++] = msize >> 0, i |= 0x10;
536
			if (msize & 0xff00)
537
				out[outpos++] = msize >> 8, i |= 0x20;
538
539
			*op = i;
540
541
			data += msize;
542
			moff += msize;
543
			msize = left;
544
545
			if (msize < 4096) {
546
				int j;
547
				val = 0;
548
				for (j = -RABIN_WINDOW; j < 0; j++)
549
					val = ((val << 8) | data[j])
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
550
						  ^ T[val >> RABIN_SHIFT];
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
551
			}
552
		}
553
554
		if (outpos >= outsize - MAX_OP_SIZE) {
555
			void *tmp = out;
556
			outsize = outsize * 3 / 2;
557
			if (max_size && outsize >= max_size)
558
				outsize = max_size + MAX_OP_SIZE + 1;
559
			if (max_size && outpos > max_size)
560
				break;
561
			out = realloc(out, outsize);
562
			if (!out) {
563
				free(tmp);
564
				return NULL;
565
			}
566
		}
567
	}
568
569
	if (inscnt)
570
		out[outpos - inscnt - 1] = inscnt;
571
572
	if (max_size && outpos > max_size) {
573
		free(out);
574
		return NULL;
575
	}
576
577
	*delta_size = outpos;
578
	return out;
579
}
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
580
0.23.36 by John Arbash Meinel
Track down a memory leak in the refactored diff-delta.c code.
581
/* vim: noet ts=4 sw=4 sts=4
582
 */