81
81
def test_one_nosha_delta(self):
82
82
# diff against NUKK
83
83
compressor = self.compressor()
84
text = b'strange\ncommon\n'
85
sha1, start_point, end_point, _ = compressor.compress(
86
('label',), [text], len(text), None)
84
sha1, start_point, end_point, _ = compressor.compress(('label',),
85
b'strange\ncommon\n', None)
87
86
self.assertEqual(sha_string(b'strange\ncommon\n'), sha1)
88
87
expected_lines = b'f\x0fstrange\ncommon\n'
89
88
self.assertEqual(expected_lines, b''.join(compressor.chunks))
93
92
def test_empty_content(self):
94
93
compressor = self.compressor()
95
94
# Adding empty bytes should return the 'null' record
96
sha1, start_point, end_point, kind = compressor.compress(
97
('empty',), [], 0, None)
95
sha1, start_point, end_point, kind = compressor.compress(('empty',),
98
97
self.assertEqual(0, start_point)
99
98
self.assertEqual(0, end_point)
100
99
self.assertEqual('fulltext', kind)
102
101
self.assertEqual(0, compressor.endpoint)
103
102
self.assertEqual([], compressor.chunks)
104
103
# Even after adding some content
105
text = b'some\nbytes\n'
106
compressor.compress(('content',), [text], len(text), None)
104
compressor.compress(('content',), b'some\nbytes\n', None)
107
105
self.assertTrue(compressor.endpoint > 0)
108
sha1, start_point, end_point, kind = compressor.compress(
109
('empty2',), [], 0, None)
106
sha1, start_point, end_point, kind = compressor.compress(('empty2',),
110
108
self.assertEqual(0, start_point)
111
109
self.assertEqual(0, end_point)
112
110
self.assertEqual('fulltext', kind)
116
114
# Knit fetching will try to reconstruct texts locally which results in
117
115
# reading something that is in the compressor stream already.
118
116
compressor = self.compressor()
119
text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
120
sha1_1, _, _, _ = compressor.compress(
121
('label',), [text], len(text), None)
117
sha1_1, _, _, _ = compressor.compress(('label',),
118
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
122
119
expected_lines = list(compressor.chunks)
123
text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
124
sha1_2, _, end_point, _ = compressor.compress(
125
('newlabel',), [text], len(text), None)
120
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
121
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
126
122
# get the first out
127
self.assertEqual(([b'strange\ncommon long line\n'
128
b'that needs a 16 byte match\n'], sha1_1),
123
self.assertEqual((b'strange\ncommon long line\n'
124
b'that needs a 16 byte match\n', sha1_1),
129
125
compressor.extract(('label',)))
131
self.assertEqual(([b'common long line\nthat needs a 16 byte match\n'
132
b'different\n'], sha1_2),
127
self.assertEqual((b'common long line\nthat needs a 16 byte match\n'
128
b'different\n', sha1_2),
133
129
compressor.extract(('newlabel',)))
135
131
def test_pop_last(self):
136
132
compressor = self.compressor()
137
text = b'some text\nfor the first entry\n'
138
_, _, _, _ = compressor.compress(
139
('key1',), [text], len(text), None)
133
_, _, _, _ = compressor.compress(('key1',),
134
b'some text\nfor the first entry\n', None)
140
135
expected_lines = list(compressor.chunks)
141
text = b'some text\nfor the second entry\n'
142
_, _, _, _ = compressor.compress(
143
('key2',), [text], len(text), None)
136
_, _, _, _ = compressor.compress(('key2',),
137
b'some text\nfor the second entry\n', None)
144
138
compressor.pop_last()
145
139
self.assertEqual(expected_lines, compressor.chunks)
153
147
def test_stats(self):
154
148
compressor = self.compressor()
155
chunks = [b'strange\n',
156
b'common very very long line\n',
159
('label',), chunks, sum(map(len, chunks)), None)
161
b'common very very long line\n',
167
chunks, sum(map(len, chunks)), None)
170
b'common very very long line\n',
175
('label3',), chunks, sum(map(len, chunks)), None)
149
compressor.compress(('label',),
151
b'common very very long line\n'
152
b'plus more text\n', None)
153
compressor.compress(('newlabel',),
154
b'common very very long line\n'
157
b'moredifferent\n', None)
158
compressor.compress(('label3',),
160
b'common very very long line\n'
163
b'moredifferent\n', None)
176
164
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
178
166
def test_two_nosha_delta(self):
179
167
compressor = self.compressor()
180
text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
181
sha1_1, _, _, _ = compressor.compress(('label',), [text], len(text), None)
168
sha1_1, _, _, _ = compressor.compress(('label',),
169
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
182
170
expected_lines = list(compressor.chunks)
183
text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
184
sha1_2, start_point, end_point, _ = compressor.compress(
185
('newlabel',), [text], len(text), None)
186
self.assertEqual(sha_string(text), sha1_2)
171
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
172
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
173
self.assertEqual(sha_string(b'common long line\n'
174
b'that needs a 16 byte match\n'
175
b'different\n'), sha1_2)
187
176
expected_lines.extend([
188
177
# 'delta', delta length
201
190
# The first interesting test: make a change that should use lines from
203
192
compressor = self.compressor()
204
text = b'strange\ncommon very very long line\nwith some extra text\n'
205
sha1_1, _, _, _ = compressor.compress(
206
('label',), [text], len(text), None)
207
text = b'different\nmoredifferent\nand then some more\n'
208
sha1_2, _, _, _ = compressor.compress(
209
('newlabel',), [text], len(text), None)
193
sha1_1, _, _, _ = compressor.compress(('label',),
194
b'strange\ncommon very very long line\nwith some extra text\n', None)
195
sha1_2, _, _, _ = compressor.compress(('newlabel',),
196
b'different\nmoredifferent\nand then some more\n', None)
210
197
expected_lines = list(compressor.chunks)
211
text = (b'new\ncommon very very long line\nwith some extra text\n'
212
b'different\nmoredifferent\nand then some more\n')
213
sha1_3, start_point, end_point, _ = compressor.compress(
214
('label3',), [text], len(text), None)
215
self.assertEqual(sha_string(text), sha1_3)
198
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
199
b'new\ncommon very very long line\nwith some extra text\n'
200
b'different\nmoredifferent\nand then some more\n',
203
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
204
b'different\nmoredifferent\nand then some more\n'),
216
206
expected_lines.extend([
217
207
# 'delta', delta length
236
226
def test_stats(self):
237
227
compressor = self.compressor()
238
chunks = [b'strange\n',
239
b'common very very long line\n',
242
('label',), chunks, sum(map(len, chunks)), None)
244
b'common very very long line\n',
249
('newlabel',), chunks, sum(map(len, chunks)), None)
252
b'common very very long line\n',
258
chunks, sum(map(len, chunks)), None)
228
compressor.compress(('label',),
230
b'common very very long line\n'
231
b'plus more text\n', None)
232
compressor.compress(('newlabel',),
233
b'common very very long line\n'
236
b'moredifferent\n', None)
237
compressor.compress(('label3',),
239
b'common very very long line\n'
242
b'moredifferent\n', None)
259
243
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
261
245
def test_two_nosha_delta(self):
262
246
compressor = self.compressor()
263
text = b'strange\ncommon long line\nthat needs a 16 byte match\n'
264
sha1_1, _, _, _ = compressor.compress(
265
('label',), [text], len(text), None)
247
sha1_1, _, _, _ = compressor.compress(('label',),
248
b'strange\ncommon long line\nthat needs a 16 byte match\n', None)
266
249
expected_lines = list(compressor.chunks)
267
text = b'common long line\nthat needs a 16 byte match\ndifferent\n'
268
sha1_2, start_point, end_point, _ = compressor.compress(
269
('newlabel',), [text], len(text), None)
270
self.assertEqual(sha_string(text), sha1_2)
250
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
251
b'common long line\nthat needs a 16 byte match\ndifferent\n', None)
252
self.assertEqual(sha_string(b'common long line\n'
253
b'that needs a 16 byte match\n'
254
b'different\n'), sha1_2)
271
255
expected_lines.extend([
272
256
# 'delta', delta length
285
269
# The first interesting test: make a change that should use lines from
287
271
compressor = self.compressor()
288
text = b'strange\ncommon very very long line\nwith some extra text\n'
289
sha1_1, _, _, _ = compressor.compress(
290
('label',), [text], len(text), None)
291
text = b'different\nmoredifferent\nand then some more\n'
292
sha1_2, _, _, _ = compressor.compress(
293
('newlabel',), [text], len(text), None)
272
sha1_1, _, _, _ = compressor.compress(('label',),
273
b'strange\ncommon very very long line\nwith some extra text\n', None)
274
sha1_2, _, _, _ = compressor.compress(('newlabel',),
275
b'different\nmoredifferent\nand then some more\n', None)
294
276
expected_lines = list(compressor.chunks)
295
text = (b'new\ncommon very very long line\nwith some extra text\n'
296
b'different\nmoredifferent\nand then some more\n')
297
sha1_3, start_point, end_point, _ = compressor.compress(
298
('label3',), [text], len(text), None)
299
self.assertEqual(sha_string(text), sha1_3)
277
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
278
b'new\ncommon very very long line\nwith some extra text\n'
279
b'different\nmoredifferent\nand then some more\n',
282
sha_string(b'new\ncommon very very long line\nwith some extra text\n'
283
b'different\nmoredifferent\nand then some more\n'),
300
285
expected_lines.extend([
301
286
# 'delta', delta length
320
305
compressor = groupcompress.GroupCompressor()
322
307
for key in sorted(key_to_text):
324
key, [key_to_text[key]], len(key_to_text[key]), None)
308
compressor.compress(key, key_to_text[key], None)
325
309
locs = dict((key, (start, end)) for key, (start, _, end, _)
326
310
in compressor.labels_deltas.items())
327
311
block = compressor.flush()
962
946
compressor = groupcompress.GroupCompressor()
964
948
for key in sorted(key_to_text):
966
key, [key_to_text[key]], len(key_to_text[key]), None)
949
compressor.compress(key, key_to_text[key], None)
967
950
locs = dict((key, (start, end)) for key, (start, _, end, _)
968
951
in compressor.labels_deltas.items())
969
952
block = compressor.flush()