/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
1
/*
2
 * diff-delta.c: generate a delta between two buffers
3
 *
4
 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5
 * http://www.xmailserver.org/xdiff-lib.html
6
 *
7
 * Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
8
 *
9
 * This code is free software; you can redistribute it and/or modify
10
 * it under the terms of the GNU General Public License version 2 as
11
 * published by the Free Software Foundation.
12
 */
13
14
#include "delta.h"
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
15
#include <assert.h>
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
16
17
/* maximum hash entry list for the same hash bucket */
18
#define HASH_LIMIT 64
19
20
#define RABIN_SHIFT 23
21
#define RABIN_WINDOW 16
22
23
static const unsigned int T[256] = {
24
	0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25
	0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26
	0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27
	0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28
	0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29
	0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30
	0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31
	0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32
	0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33
	0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34
	0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35
	0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36
	0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37
	0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38
	0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39
	0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40
	0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41
	0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42
	0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43
	0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44
	0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45
	0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46
	0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47
	0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48
	0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49
	0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50
	0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51
	0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52
	0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53
	0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54
	0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55
	0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56
	0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57
	0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58
	0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59
	0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60
	0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61
	0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62
	0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63
	0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64
	0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65
	0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66
	0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
67
};
68
69
static const unsigned int U[256] = {
70
	0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71
	0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72
	0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73
	0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74
	0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75
	0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76
	0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77
	0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78
	0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79
	0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80
	0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81
	0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82
	0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83
	0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84
	0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85
	0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86
	0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87
	0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88
	0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89
	0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90
	0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91
	0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92
	0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93
	0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94
	0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95
	0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96
	0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97
	0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98
	0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99
	0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100
	0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101
	0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102
	0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103
	0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104
	0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105
	0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106
	0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107
	0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108
	0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109
	0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110
	0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111
	0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112
	0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
113
};
114
115
struct index_entry {
116
	const unsigned char *ptr;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
117
	const struct source_info *src;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
118
	unsigned int val;
119
};
120
121
struct unpacked_index_entry {
122
	struct index_entry entry;
123
	struct unpacked_index_entry *next;
124
};
125
126
struct delta_index {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
127
	unsigned long memsize; /* Total bytes pointed to by this index */
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
128
	const struct source_info *last_src; /* Information about the referenced source */
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
129
	unsigned int hash_mask; /* val & hash_mask gives the hash index for a given
130
							   entry */
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
131
	unsigned int num_entries; /* The total number of entries in this index */
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
132
	struct index_entry *last_entry; /* Pointer to the last valid entry */
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
133
	struct index_entry *hash[];
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
134
};
135
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
136
static unsigned int
137
limit_hash_buckets(struct unpacked_index_entry **hash,
138
				   unsigned int *hash_count, unsigned int hsize,
139
				   unsigned int entries)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
140
{
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
141
	struct unpacked_index_entry *entry;
142
	unsigned int i;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
143
	/*
144
	 * Determine a limit on the number of entries in the same hash
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
145
	 * bucket.	This guards us against pathological data sets causing
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
146
	 * really bad hash distribution with most entries in the same hash
147
	 * bucket that would bring us to O(m*n) computing costs (m and n
148
	 * corresponding to reference and target buffer sizes).
149
	 *
150
	 * Make sure none of the hash buckets has more entries than
151
	 * we're willing to test.  Otherwise we cull the entry list
152
	 * uniformly to still preserve a good repartition across
153
	 * the reference buffer.
154
	 */
155
	for (i = 0; i < hsize; i++) {
156
		int acc;
157
158
		if (hash_count[i] <= HASH_LIMIT)
159
			continue;
160
161
		/* We leave exactly HASH_LIMIT entries in the bucket */
162
		entries -= hash_count[i] - HASH_LIMIT;
163
164
		entry = hash[i];
165
		acc = 0;
166
167
		/*
168
		 * Assume that this loop is gone through exactly
169
		 * HASH_LIMIT times and is entered and left with
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
170
		 * acc==0.	So the first statement in the loop
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
171
		 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
172
		 * to the accumulator, and the inner loop consequently
173
		 * is run (hash_count[i]-HASH_LIMIT) times, removing
174
		 * one element from the list each time.  Since acc
175
		 * balances out to 0 at the final run, the inner loop
176
		 * body can't be left with entry==NULL.  So we indeed
177
		 * encounter entry==NULL in the outer loop only.
178
		 */
179
		do {
180
			acc += hash_count[i] - HASH_LIMIT;
181
			if (acc > 0) {
182
				struct unpacked_index_entry *keep = entry;
183
				do {
184
					entry = entry->next;
185
					acc -= HASH_LIMIT;
186
				} while (acc > 0);
187
				keep->next = entry->next;
188
			}
189
			entry = entry->next;
190
		} while (entry);
191
	}
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
192
	return entries;
193
}
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
194
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
195
static struct delta_index *
196
pack_delta_index(struct unpacked_index_entry **hash, unsigned int hsize,
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
197
				 unsigned int num_entries)
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
198
{
199
	unsigned int i, memsize;
200
	struct unpacked_index_entry *entry;
201
	struct delta_index *index;
202
	struct index_entry *packed_entry, **packed_hash;
203
	void *mem;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
204
	/*
205
	 * Now create the packed index in array form
206
	 * rather than linked lists.
207
	 */
208
	memsize = sizeof(*index)
209
		+ sizeof(*packed_hash) * (hsize+1)
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
210
		+ sizeof(*packed_entry) * num_entries;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
211
	mem = malloc(memsize);
212
	if (!mem) {
213
		return NULL;
214
	}
215
216
	index = mem;
217
	index->memsize = memsize;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
218
	index->hash_mask = hsize - 1;
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
219
	index->num_entries = num_entries;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
220
221
	mem = index->hash;
222
	packed_hash = mem;
223
	mem = packed_hash + (hsize+1);
224
	packed_entry = mem;
225
226
	for (i = 0; i < hsize; i++) {
227
		/*
228
		 * Coalesce all entries belonging to one linked list
229
		 * into consecutive array entries.
230
		 */
231
		packed_hash[i] = packed_entry;
0.23.42 by John Arbash Meinel
Change the code around again.
232
		for (entry = hash[i]; entry; entry = entry->next)
233
			*packed_entry++ = entry->entry;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
234
	}
235
236
	/* Sentinel value to indicate the length of the last hash bucket */
237
	packed_hash[hsize] = packed_entry;
238
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
239
	assert(packed_entry - (struct index_entry *)mem == num_entries);
240
	index->last_entry = (packed_entry - 1);
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
241
	return index;
242
}
243
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
244
void include_entries_from_index(struct unpacked_index_entry **hash,
245
								unsigned int *hash_count,
246
								unsigned int hsize,
247
								struct unpacked_index_entry *entry,
248
								const struct delta_index *old)
249
{
250
	unsigned int i, hmask, masked_val;
251
	struct index_entry *old_entry;
252
253
	hmask = hsize - 1;
254
	/* Iterate backwards through the existing entries
255
	 * This is so that matches early in the file end up being the first pointer
256
	 * in the linked list.
257
	 */
258
	for (old_entry = old->last_entry, i = 0; i < old->num_entries;
259
		 i++, old_entry--) {
260
		masked_val = old_entry->val & hmask;
261
		entry->entry = *old_entry;
262
		entry->next = hash[masked_val];
263
		hash[masked_val] = entry++;
264
		hash_count[masked_val]++;
265
	}
266
}
267
268
269
struct delta_index * create_delta_index(const struct source_info *src,
270
										const struct delta_index *old)
271
{
272
	unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
273
	unsigned int total_num_entries;
0.23.42 by John Arbash Meinel
Change the code around again.
274
	const unsigned char *data, *buffer;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
275
	struct delta_index *index;
276
	struct unpacked_index_entry *entry, **hash;
277
	void *mem;
278
	unsigned long memsize;
279
0.23.42 by John Arbash Meinel
Change the code around again.
280
	if (!src->buf || !src->size)
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
281
		return NULL;
0.23.42 by John Arbash Meinel
Change the code around again.
282
	buffer = src->buf;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
283
284
	/* Determine index hash size.  Note that indexing skips the
285
	   first byte to allow for optimizing the Rabin's polynomial
286
	   initialization in create_delta(). */
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
287
	num_entries = (src->size - 1)  / RABIN_WINDOW;
288
	if (old != NULL)
289
		total_num_entries = num_entries + old->num_entries;
290
	else
291
		total_num_entries = num_entries;
292
	hsize = total_num_entries / 4;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
293
	for (i = 4; (1u << i) < hsize && i < 31; i++);
294
	hsize = 1 << i;
295
	hmask = hsize - 1;
296
297
	/* allocate lookup index */
298
	memsize = sizeof(*hash) * hsize +
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
299
		  sizeof(*entry) * total_num_entries;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
300
	mem = malloc(memsize);
301
	if (!mem)
302
		return NULL;
303
	hash = mem;
304
	mem = hash + hsize;
305
	entry = mem;
306
307
	memset(hash, 0, hsize * sizeof(*hash));
308
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
309
	/* allocate an array to count hash num_entries */
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
310
	hash_count = calloc(hsize, sizeof(*hash_count));
311
	if (!hash_count) {
312
		free(hash);
313
		return NULL;
314
	}
315
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
316
	/* then populate the index for the new data */
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
317
	prev_val = ~0;
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
318
	for (data = buffer + num_entries * RABIN_WINDOW - RABIN_WINDOW;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
319
		 data >= buffer;
320
		 data -= RABIN_WINDOW) {
321
		unsigned int val = 0;
322
		for (i = 1; i <= RABIN_WINDOW; i++)
323
			val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
324
		if (val == prev_val) {
325
			/* keep the lowest of consecutive identical blocks */
326
			entry[-1].entry.ptr = data + RABIN_WINDOW;
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
327
			--num_entries;
328
			--total_num_entries;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
329
		} else {
330
			prev_val = val;
331
			i = val & hmask;
332
			entry->entry.ptr = data + RABIN_WINDOW;
333
			entry->entry.val = val;
0.23.42 by John Arbash Meinel
Change the code around again.
334
			entry->entry.src = src;
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
335
			entry->next = hash[i];
336
			hash[i] = entry++;
337
			hash_count[i]++;
338
		}
339
	}
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
340
	/* If we have an existing delta_index, we want to bring its info into the
341
	 * new structure. We assume that the existing structure's objects all occur
342
	 * before the new source, so they get first precedence in the index.
343
	 */
344
	if (old != NULL)
345
		include_entries_from_index(hash, hash_count, hsize, entry, old);
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
346
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
347
	total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
348
										   total_num_entries);
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
349
	free(hash_count);
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
350
	index = pack_delta_index(hash, hsize, total_num_entries);
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
351
	index->last_src = src;
0.23.36 by John Arbash Meinel
Track down a memory leak in the refactored diff-delta.c code.
352
	free(hash);
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
353
	if (!index) {
354
		return NULL;
355
	}
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
356
	return index;
357
}
358
359
void free_delta_index(struct delta_index *index)
360
{
361
	free(index);
362
}
363
364
unsigned long sizeof_delta_index(struct delta_index *index)
365
{
366
	if (index)
367
		return index->memsize;
368
	else
369
		return 0;
370
}
371
372
/*
373
 * The maximum size for any opcode sequence, including the initial header
374
 * plus Rabin window plus biggest copy.
375
 */
376
#define MAX_OP_SIZE	(5 + 5 + 1 + RABIN_WINDOW + 7)
377
378
void *
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
379
create_delta(const struct delta_index *index,
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
380
			 const void *trg_buf, unsigned long trg_size,
381
			 unsigned long *delta_size, unsigned long max_size)
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
382
{
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
383
	unsigned int i, outpos, outsize, moff, msize, val;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
384
	const struct source_info *msource;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
385
	int inscnt;
386
	const unsigned char *ref_data, *ref_top, *data, *top;
387
	unsigned char *out;
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
388
	unsigned long source_size;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
389
390
	if (!trg_buf || !trg_size)
391
		return NULL;
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
392
	if (index == NULL)
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
393
		return NULL;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
394
395
	outpos = 0;
396
	outsize = 8192;
397
	if (max_size && outsize >= max_size)
398
		outsize = max_size + MAX_OP_SIZE + 1;
399
	out = malloc(outsize);
400
	if (!out)
401
		return NULL;
402
403
	/* store reference buffer size */
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
404
	source_size = index->last_src->size + index->last_src->agg_offset;
405
	i = source_size;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
406
	while (i >= 0x80) {
407
		out[outpos++] = i | 0x80;
408
		i >>= 7;
409
	}
410
	out[outpos++] = i;
411
412
	/* store target buffer size */
413
	i = trg_size;
414
	while (i >= 0x80) {
415
		out[outpos++] = i | 0x80;
416
		i >>= 7;
417
	}
418
	out[outpos++] = i;
419
420
	data = trg_buf;
421
	top = (const unsigned char *) trg_buf + trg_size;
422
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
423
	/* Start the matching by filling out with a simple 'insert' instruction, of
424
	 * the first RABIN_WINDOW bytes of the input.
425
	 */
426
	outpos++; /* leave a byte for the insert command */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
427
	val = 0;
428
	for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
429
		out[outpos++] = *data;
430
		val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
431
	}
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
432
	/* we are now setup with an insert of 'i' bytes and val contains the RABIN
433
	 * hash for those bytes, and data points to the RABIN_WINDOW+1 byte of
434
	 * input.
435
	 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
436
	inscnt = i;
437
438
	moff = 0;
439
	msize = 0;
0.23.41 by John Arbash Meinel
Start moving the information about source buffers into the actual index_entry.
440
	msource = NULL;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
441
	while (data < top) {
442
		if (msize < 4096) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
443
			/* we don't have a 'worthy enough' match yet, so let's look for
444
			 * one.
445
			 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
446
			struct index_entry *entry;
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
447
			/* Shift the window by one byte. */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
448
			val ^= U[data[-RABIN_WINDOW]];
449
			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
450
			i = val & index->hash_mask;
451
			/* TODO: When using multiple indexes like this, the hash tables
452
			 *		 mapping val => index_entry become less efficient.
453
			 *		 You end up getting a lot more collisions in the hash,
454
			 *		 which doesn't actually lead to a entry->val match.
455
			 */
456
			for (entry = index->hash[i]; entry < index->hash[i+1];
457
				 entry++) {
458
				const unsigned char *ref;
459
				const unsigned char *src;
460
				unsigned int ref_size;
461
				if (entry->val != val)
462
					continue;
463
				ref = entry->ptr;
464
				src = data;
465
				ref_data = entry->src->buf;
466
				ref_top = ref_data + entry->src->size;
467
				ref_size = ref_top - ref;
468
				/* ref_size is the longest possible match that we could make
469
				 * here. If ref_size <= msize, then we know that we cannot
470
				 * match more bytes with this location that we have already
471
				 * matched.
0.23.31 by John Arbash Meinel
Add a bit of comments about things to do.
472
				 */
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
473
				if (ref_size > top - src)
474
					ref_size = top - src;
475
				if (ref_size <= msize)
476
					break;
477
				/* See how many bytes actually match at this location. */
478
				while (ref_size-- && *src++ == *ref)
479
					ref++;
480
				if (msize < ref - entry->ptr) {
481
					/* this is our best match so far */
482
					msize = ref - entry->ptr;
483
					msource = entry->src;
484
					moff = entry->ptr - ref_data;
485
					if (msize >= 4096) /* good enough */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
486
						break;
487
				}
488
			}
489
		}
490
491
		if (msize < 4) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
492
			/* The best match right now is less than 4 bytes long. So just add
493
			 * the current byte to the insert instruction. Increment the insert
494
			 * counter, and copy the byte of data into the output buffer.
495
			 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
496
			if (!inscnt)
497
				outpos++;
498
			out[outpos++] = *data++;
499
			inscnt++;
500
			if (inscnt == 0x7f) {
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
501
				/* We have a max length insert instruction, finalize it in the
502
				 * output.
503
				 */
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
504
				out[outpos - inscnt - 1] = inscnt;
505
				inscnt = 0;
506
			}
507
			msize = 0;
508
		} else {
509
			unsigned int left;
510
			unsigned char *op;
511
512
			if (inscnt) {
0.23.42 by John Arbash Meinel
Change the code around again.
513
				ref_data = msource->buf;
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
514
				while (moff && ref_data[moff-1] == data[-1]) {
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
515
					/* we can match one byte back */
516
					msize++;
517
					moff--;
518
					data--;
519
					outpos--;
520
					if (--inscnt)
521
						continue;
522
					outpos--;  /* remove count slot */
523
					inscnt--;  /* make it -1 */
524
					break;
525
				}
526
				out[outpos - inscnt - 1] = inscnt;
527
				inscnt = 0;
528
			}
529
530
			/* A copy op is currently limited to 64KB (pack v2) */
531
			left = (msize < 0x10000) ? 0 : (msize - 0x10000);
532
			msize -= left;
533
534
			op = out + outpos++;
535
			i = 0x80;
536
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
537
			/* moff is the offset in the local structure, for encoding, we need
538
			 * to push it into the global offset
539
			 */
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
540
			assert(moff < msource->size);
0.23.42 by John Arbash Meinel
Change the code around again.
541
			moff += msource->agg_offset;
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
542
			assert(moff + msize <= source_size);
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
543
			if (moff & 0x000000ff)
544
				out[outpos++] = moff >> 0,  i |= 0x01;
545
			if (moff & 0x0000ff00)
546
				out[outpos++] = moff >> 8,  i |= 0x02;
547
			if (moff & 0x00ff0000)
548
				out[outpos++] = moff >> 16, i |= 0x04;
549
			if (moff & 0xff000000)
550
				out[outpos++] = moff >> 24, i |= 0x08;
0.23.38 by John Arbash Meinel
fix the local offset problem in a slightly different way.
551
			/* Put it back into local coordinates, in case we have multiple
552
			 * copies in a row.
553
			 */
0.23.42 by John Arbash Meinel
Change the code around again.
554
			moff -= msource->agg_offset;
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
555
556
			if (msize & 0x00ff)
557
				out[outpos++] = msize >> 0, i |= 0x10;
558
			if (msize & 0xff00)
559
				out[outpos++] = msize >> 8, i |= 0x20;
560
561
			*op = i;
562
563
			data += msize;
564
			moff += msize;
565
			msize = left;
566
567
			if (msize < 4096) {
568
				int j;
569
				val = 0;
570
				for (j = -RABIN_WINDOW; j < 0; j++)
571
					val = ((val << 8) | data[j])
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
572
						  ^ T[val >> RABIN_SHIFT];
0.23.2 by Nicolas Pitre
Add the diff-delta.c and patch-delta.c files.
573
			}
574
		}
575
576
		if (outpos >= outsize - MAX_OP_SIZE) {
577
			void *tmp = out;
578
			outsize = outsize * 3 / 2;
579
			if (max_size && outsize >= max_size)
580
				outsize = max_size + MAX_OP_SIZE + 1;
581
			if (max_size && outpos > max_size)
582
				break;
583
			out = realloc(out, outsize);
584
			if (!out) {
585
				free(tmp);
586
				return NULL;
587
			}
588
		}
589
	}
590
591
	if (inscnt)
592
		out[outpos - inscnt - 1] = inscnt;
593
594
	if (max_size && outpos > max_size) {
595
		free(out);
596
		return NULL;
597
	}
598
599
	*delta_size = outpos;
600
	return out;
601
}
0.23.24 by John Arbash Meinel
Change the code so that we can pass in multiple sources to match against.
602
0.23.36 by John Arbash Meinel
Track down a memory leak in the refactored diff-delta.c code.
603
/* vim: noet ts=4 sw=4 sts=4
604
 */