bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
3735.31.2
by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts. |
1 |
# Copyright (C) 2008, 2009 Canonical Ltd
|
2 |
#
|
|
|
0.17.1
by Robert Collins
Starting point. Interface tests hooked up and failing. |
3 |
# This program is free software; you can redistribute it and/or modify
|
|
3735.31.2
by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts. |
4 |
# it under the terms of the GNU General Public License as published by
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
|
0.17.1
by Robert Collins
Starting point. Interface tests hooked up and failing. |
8 |
# This program is distributed in the hope that it will be useful,
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
|
3735.31.2
by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts. |
12 |
#
|
|
0.17.1
by Robert Collins
Starting point. Interface tests hooked up and failing. |
13 |
# You should have received a copy of the GNU General Public License
|
14 |
# along with this program; if not, write to the Free Software
|
|
|
3735.36.3
by John Arbash Meinel
Add the new address for FSF to the new files. |
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
0.17.1
by Robert Collins
Starting point. Interface tests hooked up and failing. |
16 |
|
17 |
"""Tests for group compression."""
|
|
18 |
||
19 |
import zlib |
|
20 |
||
|
3735.31.1
by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch. |
21 |
from bzrlib import ( |
22 |
groupcompress, |
|
|
3735.32.8
by John Arbash Meinel
Some tests for the LazyGroupCompressFactory |
23 |
errors, |
|
3735.32.7
by John Arbash Meinel
Implement partial decompression support. |
24 |
osutils, |
|
3735.31.1
by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch. |
25 |
tests, |
|
3735.32.20
by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given. |
26 |
versionedfile, |
|
3735.31.1
by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch. |
27 |
)
|
|
0.23.58
by John Arbash Meinel
fix up the failing tests. |
28 |
from bzrlib.osutils import sha_string |
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
29 |
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature |
|
3735.31.1
by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch. |
30 |
|
31 |
||
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
32 |
def load_tests(standard_tests, module, loader): |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
33 |
"""Parameterize tests for all versions of groupcompress.""" |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
34 |
to_adapt, result = tests.split_suite_by_condition( |
35 |
standard_tests, tests.condition_isinstance(TestAllGroupCompressors)) |
|
36 |
scenarios = [ |
|
37 |
('python', {'compressor': groupcompress.PythonGroupCompressor}), |
|
38 |
]
|
|
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
39 |
if CompiledGroupCompressFeature.available(): |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
40 |
scenarios.append(('C', |
41 |
{'compressor': groupcompress.PyrexGroupCompressor})) |
|
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
42 |
return tests.multiply_tests(to_adapt, scenarios, result) |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
43 |
|
44 |
||
|
0.25.2
by John Arbash Meinel
First cut at meta-info as text form. |
45 |
class TestGroupCompressor(tests.TestCase): |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
46 |
|
47 |
def _chunks_to_repr_lines(self, chunks): |
|
48 |
return '\n'.join(map(repr, ''.join(chunks).split('\n'))) |
|
49 |
||
50 |
def assertEqualDiffEncoded(self, expected, actual): |
|
51 |
"""Compare the actual content to the expected content. |
|
52 |
||
53 |
:param expected: A group of chunks that we expect to see
|
|
54 |
:param actual: The measured 'chunks'
|
|
55 |
||
56 |
We will transform the chunks back into lines, and then run 'repr()'
|
|
57 |
over them to handle non-ascii characters.
|
|
58 |
"""
|
|
59 |
self.assertEqualDiff(self._chunks_to_repr_lines(expected), |
|
60 |
self._chunks_to_repr_lines(actual)) |
|
61 |
||
62 |
||
63 |
class TestAllGroupCompressors(TestGroupCompressor): |
|
|
0.17.2
by Robert Collins
Core proof of concept working. |
64 |
"""Tests for GroupCompressor""" |
65 |
||
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
66 |
compressor = None # Set by multiply_tests |
67 |
||
|
0.17.2
by Robert Collins
Core proof of concept working. |
68 |
def test_empty_delta(self): |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
69 |
compressor = self.compressor() |
|
3735.40.17
by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more |
70 |
self.assertEqual([], compressor.chunks) |
|
0.17.2
by Robert Collins
Core proof of concept working. |
71 |
|
72 |
def test_one_nosha_delta(self): |
|
73 |
# diff against NUKK
|
|
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
74 |
compressor = self.compressor() |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
75 |
sha1, start_point, end_point, _ = compressor.compress(('label',), |
|
0.23.58
by John Arbash Meinel
fix up the failing tests. |
76 |
'strange\ncommon\n', None) |
77 |
self.assertEqual(sha_string('strange\ncommon\n'), sha1) |
|
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
78 |
expected_lines = 'f' '\x0f' 'strange\ncommon\n' |
|
3735.40.17
by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more |
79 |
self.assertEqual(expected_lines, ''.join(compressor.chunks)) |
|
3735.2.162
by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point. |
80 |
self.assertEqual(0, start_point) |
|
0.17.2
by Robert Collins
Core proof of concept working. |
81 |
self.assertEqual(sum(map(len, expected_lines)), end_point) |
82 |
||
|
3735.2.162
by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point. |
83 |
def test_empty_content(self): |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
84 |
compressor = self.compressor() |
|
3735.2.162
by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point. |
85 |
# Adding empty bytes should return the 'null' record
|
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
86 |
sha1, start_point, end_point, kind = compressor.compress(('empty',), |
87 |
'', None) |
|
|
3735.2.162
by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point. |
88 |
self.assertEqual(0, start_point) |
89 |
self.assertEqual(0, end_point) |
|
90 |
self.assertEqual('fulltext', kind) |
|
91 |
self.assertEqual(groupcompress._null_sha1, sha1) |
|
92 |
self.assertEqual(0, compressor.endpoint) |
|
|
3735.40.17
by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more |
93 |
self.assertEqual([], compressor.chunks) |
|
3735.2.162
by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point. |
94 |
# Even after adding some content
|
95 |
compressor.compress(('content',), 'some\nbytes\n', None) |
|
96 |
self.assertTrue(compressor.endpoint > 0) |
|
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
97 |
sha1, start_point, end_point, kind = compressor.compress(('empty2',), |
98 |
'', None) |
|
|
3735.2.162
by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point. |
99 |
self.assertEqual(0, start_point) |
100 |
self.assertEqual(0, end_point) |
|
101 |
self.assertEqual('fulltext', kind) |
|
102 |
self.assertEqual(groupcompress._null_sha1, sha1) |
|
103 |
||
|
0.17.11
by Robert Collins
Add extraction of just-compressed texts to support converting from knits. |
104 |
def test_extract_from_compressor(self): |
105 |
# Knit fetching will try to reconstruct texts locally which results in
|
|
106 |
# reading something that is in the compressor stream already.
|
|
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
107 |
compressor = self.compressor() |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
108 |
sha1_1, _, _, _ = compressor.compress(('label',), |
|
0.25.6
by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header |
109 |
'strange\ncommon long line\nthat needs a 16 byte match\n', None) |
|
3735.40.17
by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more |
110 |
expected_lines = list(compressor.chunks) |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
111 |
sha1_2, _, end_point, _ = compressor.compress(('newlabel',), |
|
0.25.6
by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header |
112 |
'common long line\nthat needs a 16 byte match\ndifferent\n', None) |
|
0.17.11
by Robert Collins
Add extraction of just-compressed texts to support converting from knits. |
113 |
# get the first out
|
|
0.25.8
by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to |
114 |
self.assertEqual(('strange\ncommon long line\n' |
115 |
'that needs a 16 byte match\n', sha1_1), |
|
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
116 |
compressor.extract(('label',))) |
|
0.17.11
by Robert Collins
Add extraction of just-compressed texts to support converting from knits. |
117 |
# and the second
|
|
0.25.6
by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header |
118 |
self.assertEqual(('common long line\nthat needs a 16 byte match\n' |
119 |
'different\n', sha1_2), |
|
120 |
compressor.extract(('newlabel',))) |
|
|
0.25.2
by John Arbash Meinel
First cut at meta-info as text form. |
121 |
|
|
4241.17.2
by John Arbash Meinel
PythonGroupCompressor needs to support pop_last() properly. |
122 |
def test_pop_last(self): |
123 |
compressor = self.compressor() |
|
124 |
_, _, _, _ = compressor.compress(('key1',), |
|
125 |
'some text\nfor the first entry\n', None) |
|
126 |
expected_lines = list(compressor.chunks) |
|
127 |
_, _, _, _ = compressor.compress(('key2',), |
|
128 |
'some text\nfor the second entry\n', None) |
|
129 |
compressor.pop_last() |
|
130 |
self.assertEqual(expected_lines, compressor.chunks) |
|
131 |
||
|
0.25.2
by John Arbash Meinel
First cut at meta-info as text form. |
132 |
|
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
133 |
class TestPyrexGroupCompressor(TestGroupCompressor): |
134 |
||
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
135 |
_test_needs_features = [CompiledGroupCompressFeature] |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
136 |
compressor = groupcompress.PyrexGroupCompressor |
137 |
||
138 |
def test_stats(self): |
|
139 |
compressor = self.compressor() |
|
|
3735.40.7
by John Arbash Meinel
Move even more functionality into EquivalenceTable. |
140 |
compressor.compress(('label',), |
141 |
'strange\n' |
|
142 |
'common very very long line\n' |
|
143 |
'plus more text\n', None) |
|
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
144 |
compressor.compress(('newlabel',), |
|
3735.40.7
by John Arbash Meinel
Move even more functionality into EquivalenceTable. |
145 |
'common very very long line\n' |
146 |
'plus more text\n' |
|
147 |
'different\n' |
|
148 |
'moredifferent\n', None) |
|
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
149 |
compressor.compress(('label3',), |
|
3735.40.7
by John Arbash Meinel
Move even more functionality into EquivalenceTable. |
150 |
'new\n' |
151 |
'common very very long line\n' |
|
152 |
'plus more text\n' |
|
153 |
'different\n' |
|
154 |
'moredifferent\n', None) |
|
155 |
self.assertAlmostEqual(1.9, compressor.ratio(), 1) |
|
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
156 |
|
157 |
def test_two_nosha_delta(self): |
|
158 |
compressor = self.compressor() |
|
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
159 |
sha1_1, _, _, _ = compressor.compress(('label',), |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
160 |
'strange\ncommon long line\nthat needs a 16 byte match\n', None) |
|
3735.40.17
by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more |
161 |
expected_lines = list(compressor.chunks) |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
162 |
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',), |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
163 |
'common long line\nthat needs a 16 byte match\ndifferent\n', None) |
164 |
self.assertEqual(sha_string('common long line\n' |
|
165 |
'that needs a 16 byte match\n' |
|
166 |
'different\n'), sha1_2) |
|
167 |
expected_lines.extend([ |
|
168 |
# 'delta', delta length
|
|
|
3735.40.10
by John Arbash Meinel
Merge in the new delta format code. |
169 |
'd\x0f', |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
170 |
# source and target length
|
|
3735.40.10
by John Arbash Meinel
Merge in the new delta format code. |
171 |
'\x36', |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
172 |
# copy the line common
|
173 |
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c |
|
174 |
# add the line different, and the trailing newline
|
|
175 |
'\x0adifferent\n', # insert 10 bytes |
|
176 |
])
|
|
|
3735.40.17
by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more |
177 |
self.assertEqualDiffEncoded(expected_lines, compressor.chunks) |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
178 |
self.assertEqual(sum(map(len, expected_lines)), end_point) |
179 |
||
180 |
def test_three_nosha_delta(self): |
|
181 |
# The first interesting test: make a change that should use lines from
|
|
182 |
# both parents.
|
|
183 |
compressor = self.compressor() |
|
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
184 |
sha1_1, _, _, _ = compressor.compress(('label',), |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
185 |
'strange\ncommon very very long line\nwith some extra text\n', None) |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
186 |
sha1_2, _, _, _ = compressor.compress(('newlabel',), |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
187 |
'different\nmoredifferent\nand then some more\n', None) |
|
3735.40.17
by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more |
188 |
expected_lines = list(compressor.chunks) |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
189 |
sha1_3, start_point, end_point, _ = compressor.compress(('label3',), |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
190 |
'new\ncommon very very long line\nwith some extra text\n' |
191 |
'different\nmoredifferent\nand then some more\n', |
|
192 |
None) |
|
193 |
self.assertEqual( |
|
194 |
sha_string('new\ncommon very very long line\nwith some extra text\n' |
|
195 |
'different\nmoredifferent\nand then some more\n'), |
|
196 |
sha1_3) |
|
197 |
expected_lines.extend([ |
|
198 |
# 'delta', delta length
|
|
|
3735.40.10
by John Arbash Meinel
Merge in the new delta format code. |
199 |
'd\x0b', |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
200 |
# source and target length
|
|
3735.40.10
by John Arbash Meinel
Merge in the new delta format code. |
201 |
'\x5f' |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
202 |
# insert new
|
203 |
'\x03new', |
|
204 |
# Copy of first parent 'common' range
|
|
205 |
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes |
|
206 |
# Copy of second parent 'different' range
|
|
207 |
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes |
|
208 |
])
|
|
|
3735.40.17
by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more |
209 |
self.assertEqualDiffEncoded(expected_lines, compressor.chunks) |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
210 |
self.assertEqual(sum(map(len, expected_lines)), end_point) |
211 |
||
212 |
||
213 |
class TestPythonGroupCompressor(TestGroupCompressor): |
|
214 |
||
215 |
compressor = groupcompress.PythonGroupCompressor |
|
216 |
||
217 |
def test_stats(self): |
|
218 |
compressor = self.compressor() |
|
|
3735.40.7
by John Arbash Meinel
Move even more functionality into EquivalenceTable. |
219 |
compressor.compress(('label',), |
220 |
'strange\n' |
|
221 |
'common very very long line\n' |
|
222 |
'plus more text\n', None) |
|
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
223 |
compressor.compress(('newlabel',), |
|
3735.40.7
by John Arbash Meinel
Move even more functionality into EquivalenceTable. |
224 |
'common very very long line\n' |
225 |
'plus more text\n' |
|
226 |
'different\n' |
|
227 |
'moredifferent\n', None) |
|
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
228 |
compressor.compress(('label3',), |
|
3735.40.7
by John Arbash Meinel
Move even more functionality into EquivalenceTable. |
229 |
'new\n' |
230 |
'common very very long line\n' |
|
231 |
'plus more text\n' |
|
232 |
'different\n' |
|
233 |
'moredifferent\n', None) |
|
234 |
self.assertAlmostEqual(1.9, compressor.ratio(), 1) |
|
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
235 |
|
236 |
def test_two_nosha_delta(self): |
|
237 |
compressor = self.compressor() |
|
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
238 |
sha1_1, _, _, _ = compressor.compress(('label',), |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
239 |
'strange\ncommon long line\nthat needs a 16 byte match\n', None) |
|
3735.40.17
by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more |
240 |
expected_lines = list(compressor.chunks) |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
241 |
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',), |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
242 |
'common long line\nthat needs a 16 byte match\ndifferent\n', None) |
243 |
self.assertEqual(sha_string('common long line\n' |
|
244 |
'that needs a 16 byte match\n' |
|
245 |
'different\n'), sha1_2) |
|
246 |
expected_lines.extend([ |
|
247 |
# 'delta', delta length
|
|
|
3735.40.10
by John Arbash Meinel
Merge in the new delta format code. |
248 |
'd\x0f', |
249 |
# target length
|
|
250 |
'\x36', |
|
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
251 |
# copy the line common
|
252 |
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c |
|
253 |
# add the line different, and the trailing newline
|
|
254 |
'\x0adifferent\n', # insert 10 bytes |
|
255 |
])
|
|
|
3735.40.17
by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more |
256 |
self.assertEqualDiffEncoded(expected_lines, compressor.chunks) |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
257 |
self.assertEqual(sum(map(len, expected_lines)), end_point) |
258 |
||
259 |
def test_three_nosha_delta(self): |
|
260 |
# The first interesting test: make a change that should use lines from
|
|
261 |
# both parents.
|
|
262 |
compressor = self.compressor() |
|
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
263 |
sha1_1, _, _, _ = compressor.compress(('label',), |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
264 |
'strange\ncommon very very long line\nwith some extra text\n', None) |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
265 |
sha1_2, _, _, _ = compressor.compress(('newlabel',), |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
266 |
'different\nmoredifferent\nand then some more\n', None) |
|
3735.40.17
by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more |
267 |
expected_lines = list(compressor.chunks) |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
268 |
sha1_3, start_point, end_point, _ = compressor.compress(('label3',), |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
269 |
'new\ncommon very very long line\nwith some extra text\n' |
270 |
'different\nmoredifferent\nand then some more\n', |
|
271 |
None) |
|
272 |
self.assertEqual( |
|
273 |
sha_string('new\ncommon very very long line\nwith some extra text\n' |
|
274 |
'different\nmoredifferent\nand then some more\n'), |
|
275 |
sha1_3) |
|
276 |
expected_lines.extend([ |
|
277 |
# 'delta', delta length
|
|
|
3735.40.10
by John Arbash Meinel
Merge in the new delta format code. |
278 |
'd\x0c', |
279 |
# target length
|
|
280 |
'\x5f' |
|
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
281 |
# insert new
|
282 |
'\x04new\n', |
|
283 |
# Copy of first parent 'common' range
|
|
284 |
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes |
|
285 |
# Copy of second parent 'different' range
|
|
286 |
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes |
|
287 |
])
|
|
|
3735.40.17
by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more |
288 |
self.assertEqualDiffEncoded(expected_lines, compressor.chunks) |
|
3735.40.4
by John Arbash Meinel
Factor out tests that rely on the exact bytecode. |
289 |
self.assertEqual(sum(map(len, expected_lines)), end_point) |
290 |
||
291 |
||
|
0.25.2
by John Arbash Meinel
First cut at meta-info as text form. |
292 |
class TestGroupCompressBlock(tests.TestCase): |
293 |
||
|
3735.32.15
by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'. |
294 |
def make_block(self, key_to_text): |
295 |
"""Create a GroupCompressBlock, filling it with the given texts.""" |
|
296 |
compressor = groupcompress.GroupCompressor() |
|
297 |
start = 0 |
|
298 |
for key in sorted(key_to_text): |
|
299 |
compressor.compress(key, key_to_text[key], None) |
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
300 |
locs = dict((key, (start, end)) for key, (start, _, end, _) |
301 |
in compressor.labels_deltas.iteritems()) |
|
|
3735.32.23
by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block |
302 |
block = compressor.flush() |
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
303 |
raw_bytes = block.to_bytes() |
|
3735.32.23
by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block |
304 |
# Go through from_bytes(to_bytes()) so that we start with a compressed
|
305 |
# content object
|
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
306 |
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes) |
|
3735.32.15
by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'. |
307 |
|
|
0.25.2
by John Arbash Meinel
First cut at meta-info as text form. |
308 |
def test_from_empty_bytes(self): |
|
3735.31.1
by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch. |
309 |
self.assertRaises(ValueError, |
|
0.25.2
by John Arbash Meinel
First cut at meta-info as text form. |
310 |
groupcompress.GroupCompressBlock.from_bytes, '') |
311 |
||
|
0.25.4
by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values. |
312 |
def test_from_minimal_bytes(self): |
|
3735.32.4
by John Arbash Meinel
Change the byte representation of a groupcompress block. |
313 |
block = groupcompress.GroupCompressBlock.from_bytes( |
|
3735.38.4
by John Arbash Meinel
Another disk format change. |
314 |
'gcb1z\n0\n0\n') |
|
0.25.4
by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values. |
315 |
self.assertIsInstance(block, groupcompress.GroupCompressBlock) |
|
3735.32.6
by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time. |
316 |
self.assertIs(None, block._content) |
317 |
self.assertEqual('', block._z_content) |
|
318 |
block._ensure_content() |
|
|
3735.32.5
by John Arbash Meinel
Change the parsing code to start out just holding the compressed bytes. |
319 |
self.assertEqual('', block._content) |
|
3735.32.27
by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds. |
320 |
self.assertEqual('', block._z_content) |
|
3735.32.6
by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time. |
321 |
block._ensure_content() # Ensure content is safe to call 2x |
|
0.25.4
by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values. |
322 |
|
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
323 |
def test_from_invalid(self): |
324 |
self.assertRaises(ValueError, |
|
325 |
groupcompress.GroupCompressBlock.from_bytes, |
|
326 |
'this is not a valid header') |
|
327 |
||
|
3735.38.4
by John Arbash Meinel
Another disk format change. |
328 |
def test_from_bytes(self): |
|
3735.32.4
by John Arbash Meinel
Change the byte representation of a groupcompress block. |
329 |
content = ('a tiny bit of content\n') |
330 |
z_content = zlib.compress(content) |
|
331 |
z_bytes = ( |
|
332 |
'gcb1z\n' # group compress block v1 plain |
|
333 |
'%d\n' # Length of compressed content |
|
334 |
'%d\n' # Length of uncompressed content |
|
335 |
'%s' # Compressed content |
|
|
3735.38.4
by John Arbash Meinel
Another disk format change. |
336 |
) % (len(z_content), len(content), z_content) |
|
0.25.6
by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header |
337 |
block = groupcompress.GroupCompressBlock.from_bytes( |
|
3735.32.4
by John Arbash Meinel
Change the byte representation of a groupcompress block. |
338 |
z_bytes) |
|
3735.32.6
by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time. |
339 |
self.assertEqual(z_content, block._z_content) |
340 |
self.assertIs(None, block._content) |
|
|
3735.38.4
by John Arbash Meinel
Another disk format change. |
341 |
self.assertEqual(len(z_content), block._z_content_length) |
342 |
self.assertEqual(len(content), block._content_length) |
|
|
3735.32.10
by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths. |
343 |
block._ensure_content() |
|
3735.32.27
by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds. |
344 |
self.assertEqual(z_content, block._z_content) |
|
3735.32.10
by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths. |
345 |
self.assertEqual(content, block._content) |
346 |
||
|
0.25.2
by John Arbash Meinel
First cut at meta-info as text form. |
347 |
def test_to_bytes(self): |
|
3735.38.4
by John Arbash Meinel
Another disk format change. |
348 |
content = ('this is some content\n' |
349 |
'this content will be compressed\n') |
|
|
0.25.2
by John Arbash Meinel
First cut at meta-info as text form. |
350 |
gcb = groupcompress.GroupCompressBlock() |
|
3735.38.4
by John Arbash Meinel
Another disk format change. |
351 |
gcb.set_content(content) |
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
352 |
bytes = gcb.to_bytes() |
|
3735.38.4
by John Arbash Meinel
Another disk format change. |
353 |
self.assertEqual(gcb._z_content_length, len(gcb._z_content)) |
354 |
self.assertEqual(gcb._content_length, len(content)) |
|
|
3735.32.4
by John Arbash Meinel
Change the byte representation of a groupcompress block. |
355 |
expected_header =('gcb1z\n' # group compress block v1 zlib |
|
3735.38.4
by John Arbash Meinel
Another disk format change. |
356 |
'%d\n' # Length of compressed content |
357 |
'%d\n' # Length of uncompressed content |
|
358 |
) % (gcb._z_content_length, gcb._content_length) |
|
|
3735.32.4
by John Arbash Meinel
Change the byte representation of a groupcompress block. |
359 |
self.assertStartsWith(bytes, expected_header) |
360 |
remaining_bytes = bytes[len(expected_header):] |
|
|
0.25.5
by John Arbash Meinel
Now using a zlib compressed format. |
361 |
raw_bytes = zlib.decompress(remaining_bytes) |
|
3735.38.4
by John Arbash Meinel
Another disk format change. |
362 |
self.assertEqual(content, raw_bytes) |
|
3735.32.3
by John Arbash Meinel
Start doing some direct GCVF tests. |
363 |
|
|
3735.32.7
by John Arbash Meinel
Implement partial decompression support. |
364 |
def test_partial_decomp(self): |
365 |
content_chunks = [] |
|
366 |
# We need a sufficient amount of data so that zlib.decompress has
|
|
367 |
# partial decompression to work with. Most auto-generated data
|
|
368 |
# compresses a bit too well, we want a combination, so we combine a sha
|
|
369 |
# hash with compressible data.
|
|
370 |
for i in xrange(2048): |
|
371 |
next_content = '%d\nThis is a bit of duplicate text\n' % (i,) |
|
372 |
content_chunks.append(next_content) |
|
373 |
next_sha1 = osutils.sha_string(next_content) |
|
374 |
content_chunks.append(next_sha1 + '\n') |
|
375 |
content = ''.join(content_chunks) |
|
376 |
self.assertEqual(158634, len(content)) |
|
377 |
z_content = zlib.compress(content) |
|
378 |
self.assertEqual(57182, len(z_content)) |
|
379 |
block = groupcompress.GroupCompressBlock() |
|
380 |
block._z_content = z_content |
|
381 |
block._z_content_length = len(z_content) |
|
|
3735.32.8
by John Arbash Meinel
Some tests for the LazyGroupCompressFactory |
382 |
block._compressor_name = 'zlib' |
|
3735.32.7
by John Arbash Meinel
Implement partial decompression support. |
383 |
block._content_length = 158634 |
384 |
self.assertIs(None, block._content) |
|
385 |
block._ensure_content(100) |
|
386 |
self.assertIsNot(None, block._content) |
|
387 |
# We have decompressed at least 100 bytes
|
|
388 |
self.assertTrue(len(block._content) >= 100) |
|
389 |
# We have not decompressed the whole content
|
|
390 |
self.assertTrue(len(block._content) < 158634) |
|
391 |
self.assertEqualDiff(content[:len(block._content)], block._content) |
|
392 |
# ensuring content that we already have shouldn't cause any more data
|
|
393 |
# to be extracted
|
|
394 |
cur_len = len(block._content) |
|
395 |
block._ensure_content(cur_len - 10) |
|
396 |
self.assertEqual(cur_len, len(block._content)) |
|
397 |
# Now we want a bit more content
|
|
398 |
cur_len += 10 |
|
399 |
block._ensure_content(cur_len) |
|
400 |
self.assertTrue(len(block._content) >= cur_len) |
|
401 |
self.assertTrue(len(block._content) < 158634) |
|
402 |
self.assertEqualDiff(content[:len(block._content)], block._content) |
|
403 |
# And now lets finish
|
|
404 |
block._ensure_content(158634) |
|
405 |
self.assertEqualDiff(content, block._content) |
|
|
3735.32.8
by John Arbash Meinel
Some tests for the LazyGroupCompressFactory |
406 |
# And the decompressor is finalized
|
|
3735.32.7
by John Arbash Meinel
Implement partial decompression support. |
407 |
self.assertIs(None, block._z_content_decompressor) |
408 |
||
|
3735.32.11
by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length. |
409 |
def test_partial_decomp_no_known_length(self): |
410 |
content_chunks = [] |
|
411 |
for i in xrange(2048): |
|
412 |
next_content = '%d\nThis is a bit of duplicate text\n' % (i,) |
|
413 |
content_chunks.append(next_content) |
|
414 |
next_sha1 = osutils.sha_string(next_content) |
|
415 |
content_chunks.append(next_sha1 + '\n') |
|
416 |
content = ''.join(content_chunks) |
|
417 |
self.assertEqual(158634, len(content)) |
|
418 |
z_content = zlib.compress(content) |
|
419 |
self.assertEqual(57182, len(z_content)) |
|
420 |
block = groupcompress.GroupCompressBlock() |
|
421 |
block._z_content = z_content |
|
422 |
block._z_content_length = len(z_content) |
|
423 |
block._compressor_name = 'zlib' |
|
424 |
block._content_length = None # Don't tell the decompressed length |
|
425 |
self.assertIs(None, block._content) |
|
426 |
block._ensure_content(100) |
|
427 |
self.assertIsNot(None, block._content) |
|
428 |
# We have decompressed at least 100 bytes
|
|
429 |
self.assertTrue(len(block._content) >= 100) |
|
430 |
# We have not decompressed the whole content
|
|
431 |
self.assertTrue(len(block._content) < 158634) |
|
432 |
self.assertEqualDiff(content[:len(block._content)], block._content) |
|
433 |
# ensuring content that we already have shouldn't cause any more data
|
|
434 |
# to be extracted
|
|
435 |
cur_len = len(block._content) |
|
436 |
block._ensure_content(cur_len - 10) |
|
437 |
self.assertEqual(cur_len, len(block._content)) |
|
438 |
# Now we want a bit more content
|
|
439 |
cur_len += 10 |
|
440 |
block._ensure_content(cur_len) |
|
441 |
self.assertTrue(len(block._content) >= cur_len) |
|
442 |
self.assertTrue(len(block._content) < 158634) |
|
443 |
self.assertEqualDiff(content[:len(block._content)], block._content) |
|
444 |
# And now lets finish
|
|
445 |
block._ensure_content() |
|
446 |
self.assertEqualDiff(content, block._content) |
|
447 |
# And the decompressor is finalized
|
|
448 |
self.assertIs(None, block._z_content_decompressor) |
|
449 |
||
|
4300.1.1
by John Arbash Meinel
Add the ability to convert a gc block into 'human readable' form. |
450 |
def test__dump(self): |
451 |
dup_content = 'some duplicate content\nwhich is sufficiently long\n' |
|
452 |
key_to_text = {('1',): dup_content + '1 unique\n', |
|
453 |
('2',): dup_content + '2 extra special\n'} |
|
454 |
locs, block = self.make_block(key_to_text) |
|
455 |
self.assertEqual([('f', len(key_to_text[('1',)])), |
|
456 |
('d', 21, len(key_to_text[('2',)]), |
|
457 |
[('c', 2, len(dup_content)), |
|
458 |
('i', len('2 extra special\n'), '') |
|
459 |
]),
|
|
460 |
], block._dump()) |
|
461 |
||
|
3735.32.3
by John Arbash Meinel
Start doing some direct GCVF tests. |
462 |
|
463 |
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport): |
|
464 |
||
|
3735.32.20
by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given. |
465 |
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True, |
466 |
dir='.'): |
|
467 |
t = self.get_transport(dir) |
|
468 |
t.ensure_base() |
|
|
3735.32.3
by John Arbash Meinel
Start doing some direct GCVF tests. |
469 |
vf = groupcompress.make_pack_factory(graph=create_graph, |
470 |
delta=False, keylength=keylength)(t) |
|
471 |
if do_cleanup: |
|
472 |
self.addCleanup(groupcompress.cleanup_pack_group, vf) |
|
473 |
return vf |
|
474 |
||
|
3735.32.8
by John Arbash Meinel
Some tests for the LazyGroupCompressFactory |
475 |
|
476 |
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles): |
|
477 |
||
|
3735.32.3
by John Arbash Meinel
Start doing some direct GCVF tests. |
478 |
def test_get_record_stream_as_requested(self): |
479 |
# Consider promoting 'as-requested' to general availability, and
|
|
480 |
# make this a VF interface test
|
|
|
3735.32.21
by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al. |
481 |
vf = self.make_test_vf(False, dir='source') |
|
3735.32.3
by John Arbash Meinel
Start doing some direct GCVF tests. |
482 |
vf.add_lines(('a',), (), ['lines\n']) |
483 |
vf.add_lines(('b',), (), ['lines\n']) |
|
484 |
vf.add_lines(('c',), (), ['lines\n']) |
|
485 |
vf.add_lines(('d',), (), ['lines\n']) |
|
486 |
vf.writer.end() |
|
487 |
keys = [record.key for record in vf.get_record_stream( |
|
488 |
[('a',), ('b',), ('c',), ('d',)], |
|
489 |
'as-requested', False)] |
|
490 |
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys) |
|
491 |
keys = [record.key for record in vf.get_record_stream( |
|
492 |
[('b',), ('a',), ('d',), ('c',)], |
|
493 |
'as-requested', False)] |
|
494 |
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys) |
|
495 |
||
496 |
# It should work even after being repacked into another VF
|
|
|
3735.32.20
by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given. |
497 |
vf2 = self.make_test_vf(False, dir='target') |
|
3735.32.3
by John Arbash Meinel
Start doing some direct GCVF tests. |
498 |
vf2.insert_record_stream(vf.get_record_stream( |
499 |
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False)) |
|
500 |
vf2.writer.end() |
|
501 |
||
502 |
keys = [record.key for record in vf2.get_record_stream( |
|
503 |
[('a',), ('b',), ('c',), ('d',)], |
|
504 |
'as-requested', False)] |
|
505 |
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys) |
|
506 |
keys = [record.key for record in vf2.get_record_stream( |
|
507 |
[('b',), ('a',), ('d',), ('c',)], |
|
508 |
'as-requested', False)] |
|
509 |
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys) |
|
|
3735.32.8
by John Arbash Meinel
Some tests for the LazyGroupCompressFactory |
510 |
|
|
3735.32.21
by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al. |
511 |
def test_insert_record_stream_re_uses_blocks(self): |
512 |
vf = self.make_test_vf(True, dir='source') |
|
|
3735.32.20
by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given. |
513 |
def grouped_stream(revision_ids, first_parents=()): |
514 |
parents = first_parents |
|
515 |
for revision_id in revision_ids: |
|
516 |
key = (revision_id,) |
|
517 |
record = versionedfile.FulltextContentFactory( |
|
518 |
key, parents, None, |
|
519 |
'some content that is\n' |
|
520 |
'identical except for\n' |
|
521 |
'revision_id:%s\n' % (revision_id,)) |
|
522 |
yield record |
|
523 |
parents = (key,) |
|
524 |
# One group, a-d
|
|
525 |
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd'])) |
|
526 |
# Second group, e-h
|
|
527 |
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'], |
|
528 |
first_parents=(('d',),))) |
|
529 |
block_bytes = {} |
|
530 |
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'], |
|
531 |
'unordered', False) |
|
|
3735.32.21
by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al. |
532 |
num_records = 0 |
|
3735.32.20
by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given. |
533 |
for record in stream: |
534 |
if record.key in [('a',), ('e',)]: |
|
535 |
self.assertEqual('groupcompress-block', record.storage_kind) |
|
536 |
else: |
|
537 |
self.assertEqual('groupcompress-block-ref', |
|
538 |
record.storage_kind) |
|
539 |
block_bytes[record.key] = record._manager._block._z_content |
|
|
3735.32.21
by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al. |
540 |
num_records += 1 |
541 |
self.assertEqual(8, num_records) |
|
|
3735.32.20
by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given. |
542 |
for r in 'abcd': |
543 |
key = (r,) |
|
544 |
self.assertIs(block_bytes[key], block_bytes[('a',)]) |
|
545 |
self.assertNotEqual(block_bytes[key], block_bytes[('e',)]) |
|
546 |
for r in 'efgh': |
|
547 |
key = (r,) |
|
548 |
self.assertIs(block_bytes[key], block_bytes[('e',)]) |
|
549 |
self.assertNotEqual(block_bytes[key], block_bytes[('a',)]) |
|
550 |
# Now copy the blocks into another vf, and ensure that the blocks are
|
|
551 |
# preserved without creating new entries
|
|
552 |
vf2 = self.make_test_vf(True, dir='target') |
|
553 |
# ordering in 'groupcompress' order, should actually swap the groups in
|
|
554 |
# the target vf, but the groups themselves should not be disturbed.
|
|
555 |
vf2.insert_record_stream(vf.get_record_stream( |
|
556 |
[(r,) for r in 'abcdefgh'], 'groupcompress', False)) |
|
557 |
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'], |
|
558 |
'groupcompress', False) |
|
559 |
vf2.writer.end() |
|
|
3735.32.21
by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al. |
560 |
num_records = 0 |
|
3735.32.20
by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given. |
561 |
for record in stream: |
|
3735.32.21
by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al. |
562 |
num_records += 1 |
|
3735.32.20
by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given. |
563 |
self.assertEqual(block_bytes[record.key], |
564 |
record._manager._block._z_content) |
|
|
3735.32.21
by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al. |
565 |
self.assertEqual(8, num_records) |
566 |
||
567 |
def test__insert_record_stream_no_reuse_block(self): |
|
568 |
vf = self.make_test_vf(True, dir='source') |
|
569 |
def grouped_stream(revision_ids, first_parents=()): |
|
570 |
parents = first_parents |
|
571 |
for revision_id in revision_ids: |
|
572 |
key = (revision_id,) |
|
573 |
record = versionedfile.FulltextContentFactory( |
|
574 |
key, parents, None, |
|
575 |
'some content that is\n' |
|
576 |
'identical except for\n' |
|
577 |
'revision_id:%s\n' % (revision_id,)) |
|
578 |
yield record |
|
579 |
parents = (key,) |
|
580 |
# One group, a-d
|
|
581 |
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd'])) |
|
582 |
# Second group, e-h
|
|
583 |
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'], |
|
584 |
first_parents=(('d',),))) |
|
585 |
vf.writer.end() |
|
586 |
self.assertEqual(8, len(list(vf.get_record_stream( |
|
587 |
[(r,) for r in 'abcdefgh'], |
|
588 |
'unordered', False)))) |
|
589 |
# Now copy the blocks into another vf, and ensure that the blocks are
|
|
590 |
# preserved without creating new entries
|
|
591 |
vf2 = self.make_test_vf(True, dir='target') |
|
592 |
# ordering in 'groupcompress' order, should actually swap the groups in
|
|
593 |
# the target vf, but the groups themselves should not be disturbed.
|
|
594 |
list(vf2._insert_record_stream(vf.get_record_stream( |
|
595 |
[(r,) for r in 'abcdefgh'], 'groupcompress', False), |
|
596 |
reuse_blocks=False)) |
|
597 |
vf2.writer.end() |
|
598 |
# After inserting with reuse_blocks=False, we should have everything in
|
|
599 |
# a single new block.
|
|
600 |
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'], |
|
601 |
'groupcompress', False) |
|
602 |
block = None |
|
603 |
for record in stream: |
|
604 |
if block is None: |
|
605 |
block = record._manager._block |
|
606 |
else: |
|
607 |
self.assertIs(block, record._manager._block) |
|
608 |
||
|
3735.32.8
by John Arbash Meinel
Some tests for the LazyGroupCompressFactory |
609 |
|
|
3735.32.14
by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object. |
610 |
class TestLazyGroupCompress(tests.TestCaseWithTransport): |
|
3735.32.8
by John Arbash Meinel
Some tests for the LazyGroupCompressFactory |
611 |
|
|
3735.32.14
by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object. |
612 |
_texts = { |
613 |
('key1',): "this is a text\n" |
|
614 |
"with a reasonable amount of compressible bytes\n", |
|
615 |
('key2',): "another text\n" |
|
616 |
"with a reasonable amount of compressible bytes\n", |
|
|
3735.32.15
by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'. |
617 |
('key3',): "yet another text which won't be extracted\n" |
618 |
"with a reasonable amount of compressible bytes\n", |
|
619 |
('key4',): "this will be extracted\n" |
|
|
3735.38.2
by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3. |
620 |
"but references most of its bytes from\n" |
|
3735.32.15
by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'. |
621 |
"yet another text which won't be extracted\n" |
622 |
"with a reasonable amount of compressible bytes\n", |
|
|
3735.32.14
by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object. |
623 |
}
|
|
3735.32.8
by John Arbash Meinel
Some tests for the LazyGroupCompressFactory |
624 |
def make_block(self, key_to_text): |
625 |
"""Create a GroupCompressBlock, filling it with the given texts.""" |
|
626 |
compressor = groupcompress.GroupCompressor() |
|
627 |
start = 0 |
|
628 |
for key in sorted(key_to_text): |
|
629 |
compressor.compress(key, key_to_text[key], None) |
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
630 |
locs = dict((key, (start, end)) for key, (start, _, end, _) |
631 |
in compressor.labels_deltas.iteritems()) |
|
|
3735.32.23
by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block |
632 |
block = compressor.flush() |
633 |
raw_bytes = block.to_bytes() |
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
634 |
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes) |
|
3735.32.8
by John Arbash Meinel
Some tests for the LazyGroupCompressFactory |
635 |
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
636 |
def add_key_to_manager(self, key, locations, block, manager): |
637 |
start, end = locations[key] |
|
638 |
manager.add_factory(key, (), start, end) |
|
|
3735.32.15
by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'. |
639 |
|
|
3735.32.8
by John Arbash Meinel
Some tests for the LazyGroupCompressFactory |
640 |
def test_get_fulltexts(self): |
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
641 |
locations, block = self.make_block(self._texts) |
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
642 |
manager = groupcompress._LazyGroupContentManager(block) |
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
643 |
self.add_key_to_manager(('key1',), locations, block, manager) |
644 |
self.add_key_to_manager(('key2',), locations, block, manager) |
|
|
3735.32.15
by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'. |
645 |
result_order = [] |
646 |
for record in manager.get_record_stream(): |
|
647 |
result_order.append(record.key) |
|
648 |
text = self._texts[record.key] |
|
649 |
self.assertEqual(text, record.get_bytes_as('fulltext')) |
|
650 |
self.assertEqual([('key1',), ('key2',)], result_order) |
|
651 |
||
652 |
# If we build the manager in the opposite order, we should get them
|
|
653 |
# back in the opposite order
|
|
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
654 |
manager = groupcompress._LazyGroupContentManager(block) |
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
655 |
self.add_key_to_manager(('key2',), locations, block, manager) |
656 |
self.add_key_to_manager(('key1',), locations, block, manager) |
|
|
3735.32.15
by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'. |
657 |
result_order = [] |
658 |
for record in manager.get_record_stream(): |
|
659 |
result_order.append(record.key) |
|
660 |
text = self._texts[record.key] |
|
661 |
self.assertEqual(text, record.get_bytes_as('fulltext')) |
|
662 |
self.assertEqual([('key2',), ('key1',)], result_order) |
|
663 |
||
|
3735.32.16
by John Arbash Meinel
We now have a general header for the GC block. |
664 |
def test__wire_bytes_no_keys(self): |
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
665 |
locations, block = self.make_block(self._texts) |
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
666 |
manager = groupcompress._LazyGroupContentManager(block) |
|
3735.32.16
by John Arbash Meinel
We now have a general header for the GC block. |
667 |
wire_bytes = manager._wire_bytes() |
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
668 |
block_length = len(block.to_bytes()) |
|
3735.32.24
by John Arbash Meinel
_wire_bytes() now strips groups as necessary, as does _insert_record_stream |
669 |
# We should have triggered a strip, since we aren't using any content
|
670 |
stripped_block = manager._block.to_bytes() |
|
671 |
self.assertTrue(block_length > len(stripped_block)) |
|
672 |
empty_z_header = zlib.compress('') |
|
673 |
self.assertEqual('groupcompress-block\n' |
|
674 |
'8\n' # len(compress('')) |
|
675 |
'0\n' # len('') |
|
676 |
'%d\n'# compressed block len |
|
677 |
'%s' # zheader |
|
678 |
'%s' # block |
|
679 |
% (len(stripped_block), empty_z_header, |
|
680 |
stripped_block), |
|
681 |
wire_bytes) |
|
|
3735.32.16
by John Arbash Meinel
We now have a general header for the GC block. |
682 |
|
|
3735.32.15
by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'. |
683 |
def test__wire_bytes(self): |
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
684 |
locations, block = self.make_block(self._texts) |
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
685 |
manager = groupcompress._LazyGroupContentManager(block) |
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
686 |
self.add_key_to_manager(('key1',), locations, block, manager) |
687 |
self.add_key_to_manager(('key4',), locations, block, manager) |
|
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
688 |
block_bytes = block.to_bytes() |
|
3735.32.16
by John Arbash Meinel
We now have a general header for the GC block. |
689 |
wire_bytes = manager._wire_bytes() |
690 |
(storage_kind, z_header_len, header_len, |
|
691 |
block_len, rest) = wire_bytes.split('\n', 4) |
|
692 |
z_header_len = int(z_header_len) |
|
693 |
header_len = int(header_len) |
|
694 |
block_len = int(block_len) |
|
695 |
self.assertEqual('groupcompress-block', storage_kind) |
|
|
3735.38.2
by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3. |
696 |
self.assertEqual(33, z_header_len) |
697 |
self.assertEqual(25, header_len) |
|
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
698 |
self.assertEqual(len(block_bytes), block_len) |
|
3735.32.16
by John Arbash Meinel
We now have a general header for the GC block. |
699 |
z_header = rest[:z_header_len] |
700 |
header = zlib.decompress(z_header) |
|
701 |
self.assertEqual(header_len, len(header)) |
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
702 |
entry1 = locations[('key1',)] |
703 |
entry4 = locations[('key4',)] |
|
|
3735.32.16
by John Arbash Meinel
We now have a general header for the GC block. |
704 |
self.assertEqualDiff('key1\n' |
705 |
'\n' # no parents |
|
706 |
'%d\n' # start offset |
|
|
3735.38.2
by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3. |
707 |
'%d\n' # end offset |
|
3735.32.16
by John Arbash Meinel
We now have a general header for the GC block. |
708 |
'key4\n' |
709 |
'\n' |
|
710 |
'%d\n' |
|
711 |
'%d\n' |
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
712 |
% (entry1[0], entry1[1], |
713 |
entry4[0], entry4[1]), |
|
|
3735.32.16
by John Arbash Meinel
We now have a general header for the GC block. |
714 |
header) |
715 |
z_block = rest[z_header_len:] |
|
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
716 |
self.assertEqual(block_bytes, z_block) |
717 |
||
718 |
def test_from_bytes(self): |
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
719 |
locations, block = self.make_block(self._texts) |
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
720 |
manager = groupcompress._LazyGroupContentManager(block) |
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
721 |
self.add_key_to_manager(('key1',), locations, block, manager) |
722 |
self.add_key_to_manager(('key4',), locations, block, manager) |
|
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
723 |
wire_bytes = manager._wire_bytes() |
724 |
self.assertStartsWith(wire_bytes, 'groupcompress-block\n') |
|
|
3735.32.18
by John Arbash Meinel
We now support generating a network stream. |
725 |
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes) |
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
726 |
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager) |
|
3735.38.2
by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3. |
727 |
self.assertEqual(2, len(manager._factories)) |
|
3735.32.17
by John Arbash Meinel
We now round-trip the wire_bytes. |
728 |
self.assertEqual(block._z_content, manager._block._z_content) |
729 |
result_order = [] |
|
730 |
for record in manager.get_record_stream(): |
|
731 |
result_order.append(record.key) |
|
732 |
text = self._texts[record.key] |
|
733 |
self.assertEqual(text, record.get_bytes_as('fulltext')) |
|
|
3735.38.2
by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3. |
734 |
self.assertEqual([('key1',), ('key4',)], result_order) |
|
3735.32.23
by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block |
735 |
|
736 |
def test__check_rebuild_no_changes(self): |
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
737 |
locations, block = self.make_block(self._texts) |
|
3735.32.23
by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block |
738 |
manager = groupcompress._LazyGroupContentManager(block) |
739 |
# Request all the keys, which ensures that we won't rebuild
|
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
740 |
self.add_key_to_manager(('key1',), locations, block, manager) |
741 |
self.add_key_to_manager(('key2',), locations, block, manager) |
|
742 |
self.add_key_to_manager(('key3',), locations, block, manager) |
|
743 |
self.add_key_to_manager(('key4',), locations, block, manager) |
|
|
3735.32.23
by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block |
744 |
manager._check_rebuild_block() |
745 |
self.assertIs(block, manager._block) |
|
746 |
||
747 |
def test__check_rebuild_only_one(self): |
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
748 |
locations, block = self.make_block(self._texts) |
|
3735.32.23
by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block |
749 |
manager = groupcompress._LazyGroupContentManager(block) |
750 |
# Request just the first key, which should trigger a 'strip' action
|
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
751 |
self.add_key_to_manager(('key1',), locations, block, manager) |
|
3735.32.23
by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block |
752 |
manager._check_rebuild_block() |
753 |
self.assertIsNot(block, manager._block) |
|
754 |
self.assertTrue(block._content_length > manager._block._content_length) |
|
755 |
# We should be able to still get the content out of this block, though
|
|
756 |
# it should only have 1 entry
|
|
757 |
for record in manager.get_record_stream(): |
|
758 |
self.assertEqual(('key1',), record.key) |
|
759 |
self.assertEqual(self._texts[record.key], |
|
760 |
record.get_bytes_as('fulltext')) |
|
761 |
||
762 |
def test__check_rebuild_middle(self): |
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
763 |
locations, block = self.make_block(self._texts) |
|
3735.32.23
by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block |
764 |
manager = groupcompress._LazyGroupContentManager(block) |
765 |
# Request a small key in the middle should trigger a 'rebuild'
|
|
|
3735.40.18
by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock. |
766 |
self.add_key_to_manager(('key4',), locations, block, manager) |
|
3735.32.23
by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block |
767 |
manager._check_rebuild_block() |
768 |
self.assertIsNot(block, manager._block) |
|
769 |
self.assertTrue(block._content_length > manager._block._content_length) |
|
770 |
for record in manager.get_record_stream(): |
|
771 |
self.assertEqual(('key4',), record.key) |
|
772 |
self.assertEqual(self._texts[record.key], |
|
773 |
record.get_bytes_as('fulltext')) |