/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
1
/*
2
 * diff-delta.c: generate a delta between two buffers
3
 *
4
 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5
 * http://www.xmailserver.org/xdiff-lib.html
6
 *
7
 * Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
8
 *
9
 * This code is free software; you can redistribute it and/or modify
10
 * it under the terms of the GNU General Public License version 2 as
11
 * published by the Free Software Foundation.
12
 */
13
14
#include "delta.h"
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
15
#include <assert.h>
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
16
17
/* maximum hash entry list for the same hash bucket */
18
#define HASH_LIMIT 64
19
20
#define RABIN_SHIFT 23
21
#define RABIN_WINDOW 16
22
23
static const unsigned int T[256] = {
24
	0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25
	0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26
	0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27
	0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28
	0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29
	0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30
	0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31
	0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32
	0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33
	0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34
	0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35
	0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36
	0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37
	0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38
	0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39
	0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40
	0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41
	0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42
	0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43
	0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44
	0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45
	0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46
	0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47
	0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48
	0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49
	0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50
	0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51
	0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52
	0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53
	0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54
	0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55
	0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56
	0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57
	0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58
	0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59
	0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60
	0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61
	0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62
	0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63
	0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64
	0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65
	0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66
	0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
67
};
68
69
static const unsigned int U[256] = {
70
	0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71
	0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72
	0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73
	0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74
	0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75
	0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76
	0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77
	0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78
	0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79
	0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80
	0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81
	0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82
	0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83
	0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84
	0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85
	0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86
	0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87
	0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88
	0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89
	0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90
	0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91
	0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92
	0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93
	0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94
	0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95
	0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96
	0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97
	0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98
	0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99
	0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100
	0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101
	0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102
	0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103
	0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104
	0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105
	0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106
	0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107
	0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108
	0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109
	0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110
	0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111
	0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112
	0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
113
};
114
115
struct index_entry {
116
	const unsigned char *ptr;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
117
	const struct source_info *src;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
118
	unsigned int val;
119
};
120
121
struct unpacked_index_entry {
122
	struct index_entry entry;
123
	struct unpacked_index_entry *next;
124
};
125
126
struct delta_index {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
127
	unsigned long memsize; /* Total bytes pointed to by this index */
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
128
	const struct source_info *last_src; /* Information about the referenced source */
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
129
	unsigned int hash_mask; /* val & hash_mask gives the hash index for a given
130
							   entry */
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
131
	unsigned int num_entries; /* The total number of entries in this index */
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
132
	struct index_entry *last_entry; /* Pointer to the last valid entry */
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
133
	struct index_entry *hash[];
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
134
};
135
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
136
static unsigned int
137
limit_hash_buckets(struct unpacked_index_entry **hash,
138
				   unsigned int *hash_count, unsigned int hsize,
139
				   unsigned int entries)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
140
{
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
141
	struct unpacked_index_entry *entry;
142
	unsigned int i;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
143
	/*
144
	 * Determine a limit on the number of entries in the same hash
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
145
	 * bucket.	This guards us against pathological data sets causing
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
146
	 * really bad hash distribution with most entries in the same hash
147
	 * bucket that would bring us to O(m*n) computing costs (m and n
148
	 * corresponding to reference and target buffer sizes).
149
	 *
150
	 * Make sure none of the hash buckets has more entries than
151
	 * we're willing to test.  Otherwise we cull the entry list
152
	 * uniformly to still preserve a good repartition across
153
	 * the reference buffer.
154
	 */
155
	for (i = 0; i < hsize; i++) {
156
		int acc;
157
158
		if (hash_count[i] <= HASH_LIMIT)
159
			continue;
160
161
		/* We leave exactly HASH_LIMIT entries in the bucket */
162
		entries -= hash_count[i] - HASH_LIMIT;
163
164
		entry = hash[i];
165
		acc = 0;
166
167
		/*
168
		 * Assume that this loop is gone through exactly
169
		 * HASH_LIMIT times and is entered and left with
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
170
		 * acc==0.	So the first statement in the loop
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
171
		 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
172
		 * to the accumulator, and the inner loop consequently
173
		 * is run (hash_count[i]-HASH_LIMIT) times, removing
174
		 * one element from the list each time.  Since acc
175
		 * balances out to 0 at the final run, the inner loop
176
		 * body can't be left with entry==NULL.  So we indeed
177
		 * encounter entry==NULL in the outer loop only.
178
		 */
179
		do {
180
			acc += hash_count[i] - HASH_LIMIT;
181
			if (acc > 0) {
182
				struct unpacked_index_entry *keep = entry;
183
				do {
184
					entry = entry->next;
185
					acc -= HASH_LIMIT;
186
				} while (acc > 0);
187
				keep->next = entry->next;
188
			}
189
			entry = entry->next;
190
		} while (entry);
191
	}
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
192
	return entries;
193
}
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
194
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
195
static struct delta_index *
196
pack_delta_index(struct unpacked_index_entry **hash, unsigned int hsize,
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
197
				 unsigned int num_entries)
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
198
{
199
	unsigned int i, memsize;
200
	struct unpacked_index_entry *entry;
201
	struct delta_index *index;
202
	struct index_entry *packed_entry, **packed_hash;
203
	void *mem;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
204
	/*
205
	 * Now create the packed index in array form
206
	 * rather than linked lists.
207
	 */
208
	memsize = sizeof(*index)
209
		+ sizeof(*packed_hash) * (hsize+1)
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
210
		+ sizeof(*packed_entry) * num_entries;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
211
	mem = malloc(memsize);
212
	if (!mem) {
213
		return NULL;
214
	}
215
216
	index = mem;
217
	index->memsize = memsize;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
218
	index->hash_mask = hsize - 1;
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
219
	index->num_entries = num_entries;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
220
221
	mem = index->hash;
222
	packed_hash = mem;
223
	mem = packed_hash + (hsize+1);
224
	packed_entry = mem;
225
226
	for (i = 0; i < hsize; i++) {
227
		/*
228
		 * Coalesce all entries belonging to one linked list
229
		 * into consecutive array entries.
230
		 */
231
		packed_hash[i] = packed_entry;
0.23.42 by John Arbash Meinel
Change the code around again.
232
		for (entry = hash[i]; entry; entry = entry->next)
233
			*packed_entry++ = entry->entry;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
234
	}
235
236
	/* Sentinel value to indicate the length of the last hash bucket */
237
	packed_hash[hsize] = packed_entry;
238
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
239
	assert(packed_entry - (struct index_entry *)mem == num_entries);
240
	index->last_entry = (packed_entry - 1);
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
241
	return index;
242
}
243
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
244
void include_entries_from_index(struct unpacked_index_entry **hash,
245
								unsigned int *hash_count,
246
								unsigned int hsize,
247
								struct unpacked_index_entry *entry,
248
								const struct delta_index *old)
249
{
250
	unsigned int i, hmask, masked_val;
251
	struct index_entry *old_entry;
252
253
	hmask = hsize - 1;
254
	/* Iterate backwards through the existing entries
255
	 * This is so that matches early in the file end up being the first pointer
256
	 * in the linked list.
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
257
	 * TODO: We know that all old entries are going to occur before the new
258
	 *		 entries, and that we are going to follow this with a limit and
259
	 *		 then pack step. There is probably a way we could optimize this
260
	 *		 step, so that we wouldn't have to copy everything into a new
261
	 *		 buffer, and then copy everything again into yet another buffer.
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
262
	 */
263
	for (old_entry = old->last_entry, i = 0; i < old->num_entries;
264
		 i++, old_entry--) {
265
		masked_val = old_entry->val & hmask;
266
		entry->entry = *old_entry;
267
		entry->next = hash[masked_val];
268
		hash[masked_val] = entry++;
269
		hash_count[masked_val]++;
270
	}
271
}
272
273
274
struct delta_index * create_delta_index(const struct source_info *src,
275
										const struct delta_index *old)
276
{
277
	unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
278
	unsigned int total_num_entries;
0.23.42 by John Arbash Meinel
Change the code around again.
279
	const unsigned char *data, *buffer;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
280
	struct delta_index *index;
281
	struct unpacked_index_entry *entry, **hash;
282
	void *mem;
283
	unsigned long memsize;
284
0.23.42 by John Arbash Meinel
Change the code around again.
285
	if (!src->buf || !src->size)
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
286
		return NULL;
0.23.42 by John Arbash Meinel
Change the code around again.
287
	buffer = src->buf;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
288
289
	/* Determine index hash size.  Note that indexing skips the
290
	   first byte to allow for optimizing the Rabin's polynomial
291
	   initialization in create_delta(). */
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
292
	num_entries = (src->size - 1)  / RABIN_WINDOW;
293
	if (old != NULL)
294
		total_num_entries = num_entries + old->num_entries;
295
	else
296
		total_num_entries = num_entries;
297
	hsize = total_num_entries / 4;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
298
	for (i = 4; (1u << i) < hsize && i < 31; i++);
299
	hsize = 1 << i;
300
	hmask = hsize - 1;
301
302
	/* allocate lookup index */
303
	memsize = sizeof(*hash) * hsize +
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
304
		  sizeof(*entry) * total_num_entries;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
305
	mem = malloc(memsize);
306
	if (!mem)
307
		return NULL;
308
	hash = mem;
309
	mem = hash + hsize;
310
	entry = mem;
311
312
	memset(hash, 0, hsize * sizeof(*hash));
313
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
314
	/* allocate an array to count hash num_entries */
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
315
	hash_count = calloc(hsize, sizeof(*hash_count));
316
	if (!hash_count) {
317
		free(hash);
318
		return NULL;
319
	}
320
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
321
	/* then populate the index for the new data */
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
322
	prev_val = ~0;
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
323
	for (data = buffer + num_entries * RABIN_WINDOW - RABIN_WINDOW;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
324
		 data >= buffer;
325
		 data -= RABIN_WINDOW) {
326
		unsigned int val = 0;
327
		for (i = 1; i <= RABIN_WINDOW; i++)
328
			val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
329
		if (val == prev_val) {
330
			/* keep the lowest of consecutive identical blocks */
331
			entry[-1].entry.ptr = data + RABIN_WINDOW;
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
332
			--num_entries;
333
			--total_num_entries;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
334
		} else {
335
			prev_val = val;
336
			i = val & hmask;
337
			entry->entry.ptr = data + RABIN_WINDOW;
338
			entry->entry.val = val;
0.23.42 by John Arbash Meinel
Change the code around again.
339
			entry->entry.src = src;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
340
			entry->next = hash[i];
341
			hash[i] = entry++;
342
			hash_count[i]++;
343
		}
344
	}
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
345
	/* If we have an existing delta_index, we want to bring its info into the
346
	 * new structure. We assume that the existing structure's objects all occur
347
	 * before the new source, so they get first precedence in the index.
348
	 */
349
	if (old != NULL)
350
		include_entries_from_index(hash, hash_count, hsize, entry, old);
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
351
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
352
	total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
353
										   total_num_entries);
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
354
	free(hash_count);
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
355
	index = pack_delta_index(hash, hsize, total_num_entries);
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
356
	index->last_src = src;
0.23.36 by John Arbash Meinel
Track down a memory leak in the refactored diff-delta.c code.
357
	free(hash);
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
358
	if (!index) {
359
		return NULL;
360
	}
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
361
	return index;
362
}
363
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
364
struct delta_index * create_delta_index_from_delta(
365
										const struct source_info *src,
366
										const struct delta_index *old)
367
{
368
	unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
369
	unsigned int total_num_entries;
370
	const unsigned char *data, *buffer, *top;
371
	unsigned char cmd;
372
	struct delta_index *index;
373
	struct unpacked_index_entry *entry, **hash;
374
	void *mem;
375
	unsigned long memsize;
376
377
	if (!src->buf || !src->size)
378
		return NULL;
379
	buffer = src->buf;
380
	top = buffer + src->size;
381
382
	/* Determine index hash size.  Note that indexing skips the
383
	   first byte to allow for optimizing the Rabin's polynomial
384
	   initialization in create_delta().
385
	   This computes the maximum number of entries that could be held. The
386
	   actual number will be recomputed during processing.
387
	   */
388
	 
389
	num_entries = (src->size - 1)  / RABIN_WINDOW;
390
	if (old != NULL)
391
		total_num_entries = num_entries + old->num_entries;
392
	else
393
		total_num_entries = num_entries;
394
	hsize = total_num_entries / 4;
395
	for (i = 4; (1u << i) < hsize && i < 31; i++);
396
	hsize = 1 << i;
397
	hmask = hsize - 1;
398
399
	/* allocate lookup index */
400
	memsize = sizeof(*hash) * hsize +
401
		  sizeof(*entry) * total_num_entries;
402
	mem = malloc(memsize);
403
	if (!mem)
404
		return NULL;
405
	hash = mem;
406
	mem = hash + hsize;
407
	entry = mem;
408
409
	memset(hash, 0, hsize * sizeof(*hash));
410
411
	/* allocate an array to count hash num_entries */
412
	hash_count = calloc(hsize, sizeof(*hash_count));
413
	if (!hash_count) {
414
		free(hash);
415
		return NULL;
416
	}
417
418
	/* then populate the index for the new data */
419
	/* The create_delta_index code starts at the end and works backward,
420
	 * because it is easier to update the entry pointers (you always update the
421
	 * head, rather than updating the tail).
422
	 * However, we can't walk the delta structure that way.
423
	 */
424
	prev_val = ~0;
425
	data = buffer;
426
	/* source size */
427
	get_delta_hdr_size(&data, top);
428
	/* target size */
429
	get_delta_hdr_size(&data, top);
430
	num_entries = 0; /* calculate the real number of entries */
431
	while (data < top) {
432
		cmd = *data++;
433
		if (cmd & 0x80) {
434
			/* Copy instruction, skip it */
435
			const unsigned char *start = data;
436
			if (cmd & 0x01) data++;
437
			if (cmd & 0x02) data++;
438
			if (cmd & 0x04) data++;
439
			if (cmd & 0x08) data++;
440
			if (cmd & 0x10) data++;
441
			if (cmd & 0x20) data++;
442
			if (cmd & 0x40) data++;
443
		} else if (cmd) {
444
			/* Insert instruction, we want to index these bytes */
445
			if (data + cmd > top) {
446
				/* Invalid insert, not enough bytes in the delta */
447
				break;
448
			}
449
			for (; cmd > RABIN_WINDOW; cmd -= RABIN_WINDOW,
450
									   data += RABIN_WINDOW) {
451
				unsigned int val = 0;
452
				for (i = 1; i <= RABIN_WINDOW; i++)
453
					val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
454
				if (val != prev_val) {
455
					/* Only keep the first of consecutive data */
456
					prev_val = val;
457
					i = val & hmask;
458
					entry->entry.ptr = data + RABIN_WINDOW;
459
					entry->entry.val = val;
460
					entry->entry.src = src;
461
					entry->next = NULL;
462
					/* Now we have to insert this entry at the end of the hash
463
					 * map
464
					 */
465
					if (hash[i] == NULL) {
466
						hash[i] = entry;
467
					} else {
468
						struct unpacked_index_entry *this;
469
						for (this = entry;
470
							this->next != NULL;
471
							this = this->next) { /* No action */ }
472
						this->next = entry;
473
						this = entry;
474
					}
475
					hash_count[i]++;
476
					entry++;
477
					num_entries++;
478
				}
479
			}
480
			/* Move the data pointer by whatever remainder is left */
481
			data += cmd;
482
		} else {
483
			/*
484
			 * cmd == 0 is reserved for future encoding
485
			 * extensions. In the mean time we must fail when
486
			 * encountering them (might be data corruption).
487
			 */
488
			break;
489
		}
490
	}
491
	if (data != top) {
492
		/* Something was wrong with this delta */
493
		free(hash);
494
		free(hash_count);
495
		return NULL;
496
	}
497
	if (old != NULL) {
498
		total_num_entries = num_entries + old->num_entries;
499
		include_entries_from_index(hash, hash_count, hsize, entry, old);
500
	} else {
501
		total_num_entries = num_entries;
502
	}
503
504
	total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
505
										   total_num_entries);
506
	free(hash_count);
507
	index = pack_delta_index(hash, hsize, total_num_entries);
508
	index->last_src = src;
509
	free(hash);
510
	if (!index) {
511
		return NULL;
512
	}
513
	return index;
514
}
515
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
516
void free_delta_index(struct delta_index *index)
517
{
518
	free(index);
519
}
520
521
unsigned long sizeof_delta_index(struct delta_index *index)
522
{
523
	if (index)
524
		return index->memsize;
525
	else
526
		return 0;
527
}
528
529
/*
530
 * The maximum size for any opcode sequence, including the initial header
531
 * plus Rabin window plus biggest copy.
532
 */
533
#define MAX_OP_SIZE	(5 + 5 + 1 + RABIN_WINDOW + 7)
534
535
void *
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
536
create_delta(const struct delta_index *index,
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
537
			 const void *trg_buf, unsigned long trg_size,
538
			 unsigned long *delta_size, unsigned long max_size)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
539
{
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
540
	unsigned int i, outpos, outsize, moff, msize, val;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
541
	const struct source_info *msource;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
542
	int inscnt;
543
	const unsigned char *ref_data, *ref_top, *data, *top;
544
	unsigned char *out;
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
545
	unsigned long source_size;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
546
547
	if (!trg_buf || !trg_size)
548
		return NULL;
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
549
	if (index == NULL)
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
550
		return NULL;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
551
552
	outpos = 0;
553
	outsize = 8192;
554
	if (max_size && outsize >= max_size)
555
		outsize = max_size + MAX_OP_SIZE + 1;
556
	out = malloc(outsize);
557
	if (!out)
558
		return NULL;
559
560
	/* store reference buffer size */
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
561
	source_size = index->last_src->size + index->last_src->agg_offset;
562
	i = source_size;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
563
	while (i >= 0x80) {
564
		out[outpos++] = i | 0x80;
565
		i >>= 7;
566
	}
567
	out[outpos++] = i;
568
569
	/* store target buffer size */
570
	i = trg_size;
571
	while (i >= 0x80) {
572
		out[outpos++] = i | 0x80;
573
		i >>= 7;
574
	}
575
	out[outpos++] = i;
576
577
	data = trg_buf;
578
	top = (const unsigned char *) trg_buf + trg_size;
579
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
580
	/* Start the matching by filling out with a simple 'insert' instruction, of
581
	 * the first RABIN_WINDOW bytes of the input.
582
	 */
583
	outpos++; /* leave a byte for the insert command */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
584
	val = 0;
585
	for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
586
		out[outpos++] = *data;
587
		val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
588
	}
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
589
	/* we are now setup with an insert of 'i' bytes and val contains the RABIN
590
	 * hash for those bytes, and data points to the RABIN_WINDOW+1 byte of
591
	 * input.
592
	 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
593
	inscnt = i;
594
595
	moff = 0;
596
	msize = 0;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
597
	msource = NULL;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
598
	while (data < top) {
599
		if (msize < 4096) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
600
			/* we don't have a 'worthy enough' match yet, so let's look for
601
			 * one.
602
			 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
603
			struct index_entry *entry;
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
604
			/* Shift the window by one byte. */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
605
			val ^= U[data[-RABIN_WINDOW]];
606
			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
607
			i = val & index->hash_mask;
608
			/* TODO: When using multiple indexes like this, the hash tables
609
			 *		 mapping val => index_entry become less efficient.
610
			 *		 You end up getting a lot more collisions in the hash,
611
			 *		 which doesn't actually lead to a entry->val match.
612
			 */
613
			for (entry = index->hash[i]; entry < index->hash[i+1];
614
				 entry++) {
615
				const unsigned char *ref;
616
				const unsigned char *src;
617
				unsigned int ref_size;
618
				if (entry->val != val)
619
					continue;
620
				ref = entry->ptr;
621
				src = data;
622
				ref_data = entry->src->buf;
623
				ref_top = ref_data + entry->src->size;
624
				ref_size = ref_top - ref;
625
				/* ref_size is the longest possible match that we could make
626
				 * here. If ref_size <= msize, then we know that we cannot
627
				 * match more bytes with this location that we have already
628
				 * matched.
0.23.31 by John Arbash Meinel
Add a bit of comments about things to do.
629
				 */
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
630
				if (ref_size > top - src)
631
					ref_size = top - src;
632
				if (ref_size <= msize)
633
					break;
634
				/* See how many bytes actually match at this location. */
635
				while (ref_size-- && *src++ == *ref)
636
					ref++;
637
				if (msize < ref - entry->ptr) {
638
					/* this is our best match so far */
639
					msize = ref - entry->ptr;
640
					msource = entry->src;
641
					moff = entry->ptr - ref_data;
642
					if (msize >= 4096) /* good enough */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
643
						break;
644
				}
645
			}
646
		}
647
648
		if (msize < 4) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
649
			/* The best match right now is less than 4 bytes long. So just add
650
			 * the current byte to the insert instruction. Increment the insert
651
			 * counter, and copy the byte of data into the output buffer.
652
			 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
653
			if (!inscnt)
654
				outpos++;
655
			out[outpos++] = *data++;
656
			inscnt++;
657
			if (inscnt == 0x7f) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
658
				/* We have a max length insert instruction, finalize it in the
659
				 * output.
660
				 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
661
				out[outpos - inscnt - 1] = inscnt;
662
				inscnt = 0;
663
			}
664
			msize = 0;
665
		} else {
666
			unsigned int left;
667
			unsigned char *op;
668
669
			if (inscnt) {
0.23.42 by John Arbash Meinel
Change the code around again.
670
				ref_data = msource->buf;
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
671
				while (moff && ref_data[moff-1] == data[-1]) {
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
672
					/* we can match one byte back */
673
					msize++;
674
					moff--;
675
					data--;
676
					outpos--;
677
					if (--inscnt)
678
						continue;
679
					outpos--;  /* remove count slot */
680
					inscnt--;  /* make it -1 */
681
					break;
682
				}
683
				out[outpos - inscnt - 1] = inscnt;
684
				inscnt = 0;
685
			}
686
687
			/* A copy op is currently limited to 64KB (pack v2) */
688
			left = (msize < 0x10000) ? 0 : (msize - 0x10000);
689
			msize -= left;
690
691
			op = out + outpos++;
692
			i = 0x80;
693
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
694
			/* moff is the offset in the local structure, for encoding, we need
695
			 * to push it into the global offset
696
			 */
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
697
			assert(moff < msource->size);
0.23.42 by John Arbash Meinel
Change the code around again.
698
			moff += msource->agg_offset;
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
699
			assert(moff + msize <= source_size);
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
700
			if (moff & 0x000000ff)
701
				out[outpos++] = moff >> 0,  i |= 0x01;
702
			if (moff & 0x0000ff00)
703
				out[outpos++] = moff >> 8,  i |= 0x02;
704
			if (moff & 0x00ff0000)
705
				out[outpos++] = moff >> 16, i |= 0x04;
706
			if (moff & 0xff000000)
707
				out[outpos++] = moff >> 24, i |= 0x08;
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
708
			/* Put it back into local coordinates, in case we have multiple
709
			 * copies in a row.
710
			 */
0.23.42 by John Arbash Meinel
Change the code around again.
711
			moff -= msource->agg_offset;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
712
713
			if (msize & 0x00ff)
714
				out[outpos++] = msize >> 0, i |= 0x10;
715
			if (msize & 0xff00)
716
				out[outpos++] = msize >> 8, i |= 0x20;
717
718
			*op = i;
719
720
			data += msize;
721
			moff += msize;
722
			msize = left;
723
724
			if (msize < 4096) {
725
				int j;
726
				val = 0;
727
				for (j = -RABIN_WINDOW; j < 0; j++)
728
					val = ((val << 8) | data[j])
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
729
						  ^ T[val >> RABIN_SHIFT];
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
730
			}
731
		}
732
733
		if (outpos >= outsize - MAX_OP_SIZE) {
734
			void *tmp = out;
735
			outsize = outsize * 3 / 2;
736
			if (max_size && outsize >= max_size)
737
				outsize = max_size + MAX_OP_SIZE + 1;
738
			if (max_size && outpos > max_size)
739
				break;
740
			out = realloc(out, outsize);
741
			if (!out) {
742
				free(tmp);
743
				return NULL;
744
			}
745
		}
746
	}
747
748
	if (inscnt)
749
		out[outpos - inscnt - 1] = inscnt;
750
751
	if (max_size && outpos > max_size) {
752
		free(out);
753
		return NULL;
754
	}
755
756
	*delta_size = outpos;
757
	return out;
758
}
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
759
0.23.36 by John Arbash Meinel
Track down a memory leak in the refactored diff-delta.c code.
760
/* vim: noet ts=4 sw=4 sts=4
761
 */