bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
2890.2.3
by Robert Collins
 * New module ``bzrlib.bisect_multi`` with generic multiple-bisection-at-once  | 
1  | 
# Copyright (C) 2007 Canonical Ltd
 | 
2  | 
#
 | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
16  | 
||
17  | 
"""Tests for bisect_multi."""
 | 
|
18  | 
||
19  | 
from bzrlib import errors  | 
|
20  | 
from bzrlib.bisect_multi import bisect_multi_bytes  | 
|
21  | 
from bzrlib.tests import TestCase  | 
|
22  | 
||
23  | 
||
24  | 
class TestBisectMultiBytes(TestCase):  | 
|
25  | 
||
26  | 
def test_lookup_no_keys_no_calls(self):  | 
|
27  | 
calls = []  | 
|
28  | 
def missing_content(location_keys):  | 
|
29  | 
calls.append(location_keys)  | 
|
30  | 
return ((location_key, False) for location_key in location_keys)  | 
|
31  | 
self.assertEqual([],  | 
|
32  | 
list(bisect_multi_bytes(missing_content, 100, [])))  | 
|
33  | 
self.assertEqual([], calls)  | 
|
34  | 
||
35  | 
def test_lookup_missing_key_no_content(self):  | 
|
36  | 
"""Doing a lookup in a zero-length file still does a single request.  | 
|
37  | 
        
 | 
|
38  | 
        This makes sense because the bisector cannot tell how long content is
 | 
|
39  | 
        and its more flexible to only stop when the content object says 'False'
 | 
|
40  | 
        for a given location, key pair.
 | 
|
41  | 
        """
 | 
|
42  | 
calls = []  | 
|
43  | 
def missing_content(location_keys):  | 
|
44  | 
calls.append(location_keys)  | 
|
45  | 
return ((location_key, False) for location_key in location_keys)  | 
|
46  | 
self.assertEqual([],  | 
|
47  | 
list(bisect_multi_bytes(missing_content, 0, ['foo', 'bar'])))  | 
|
48  | 
self.assertEqual([[(0, 'foo'), (0, 'bar')]], calls)  | 
|
49  | 
||
50  | 
def test_lookup_missing_key_before_all_others(self):  | 
|
51  | 
calls = []  | 
|
52  | 
def missing_first_content(location_keys):  | 
|
53  | 
            # returns -1 for all keys unless the byte offset is 0 when it
 | 
|
54  | 
            # returns False
 | 
|
55  | 
calls.append(location_keys)  | 
|
56  | 
result = []  | 
|
57  | 
for location_key in location_keys:  | 
|
58  | 
if location_key[0] == 0:  | 
|
59  | 
result.append((location_key, False))  | 
|
60  | 
else:  | 
|
61  | 
result.append((location_key, -1))  | 
|
62  | 
return result  | 
|
63  | 
        # given a 0 length file, this should terminate with one call.
 | 
|
64  | 
self.assertEqual([],  | 
|
65  | 
list(bisect_multi_bytes(missing_first_content, 0, ['foo', 'bar'])))  | 
|
66  | 
self.assertEqual([[(0, 'foo'), (0, 'bar')]], calls)  | 
|
67  | 
del calls[:]  | 
|
68  | 
        # given a 2 length file, this should make two calls - 1, 0.
 | 
|
69  | 
self.assertEqual([],  | 
|
70  | 
list(bisect_multi_bytes(missing_first_content, 2, ['foo', 'bar'])))  | 
|
71  | 
self.assertEqual([  | 
|
72  | 
[(1, 'foo'), (1, 'bar')],  | 
|
73  | 
[(0, 'foo'), (0, 'bar')],  | 
|
74  | 
], calls)  | 
|
75  | 
del calls[:]  | 
|
76  | 
        # given a really long file - 200MB, this should make a series of calls with the
 | 
|
77  | 
        # gap between adjactent calls dropping by 50% each time. We choose a
 | 
|
78  | 
        # length which just under a power of two to generate a corner case in
 | 
|
79  | 
        # bisection - naively using power of two reduction in size can lead to
 | 
|
80  | 
        # a very long tail in the bisection process. The current users of
 | 
|
81  | 
        # the bisect_multi_bytes api are not expected to be concerned by this,
 | 
|
82  | 
        # as the delta gets down to 4K (the minimum we expect to read and
 | 
|
83  | 
        # parse) within 16 steps even on a 200MB index (which at 4 keys/K is
 | 
|
84  | 
        # 800 thousand keys, and log2 of 800000 is 19 - so we're doing log2
 | 
|
85  | 
        # steps in the worst case there.
 | 
|
86  | 
self.assertEqual([],  | 
|
87  | 
list(bisect_multi_bytes(  | 
|
88  | 
missing_first_content,268435456 - 1 , ['foo', 'bar'])))  | 
|
89  | 
self.assertEqual([  | 
|
90  | 
[(134217727, 'foo'), (134217727, 'bar')],  | 
|
91  | 
[(67108864, 'foo'), (67108864, 'bar')],  | 
|
92  | 
[(33554433, 'foo'), (33554433, 'bar')],  | 
|
93  | 
[(16777218, 'foo'), (16777218, 'bar')],  | 
|
94  | 
[(8388611, 'foo'), (8388611, 'bar')],  | 
|
95  | 
[(4194308, 'foo'), (4194308, 'bar')],  | 
|
96  | 
[(2097157, 'foo'), (2097157, 'bar')],  | 
|
97  | 
[(1048582, 'foo'), (1048582, 'bar')],  | 
|
98  | 
[(524295, 'foo'), (524295, 'bar')],  | 
|
99  | 
[(262152, 'foo'), (262152, 'bar')],  | 
|
100  | 
[(131081, 'foo'), (131081, 'bar')],  | 
|
101  | 
[(65546, 'foo'), (65546, 'bar')],  | 
|
102  | 
[(32779, 'foo'), (32779, 'bar')],  | 
|
103  | 
[(16396, 'foo'), (16396, 'bar')],  | 
|
104  | 
[(8205, 'foo'), (8205, 'bar')],  | 
|
105  | 
[(4110, 'foo'), (4110, 'bar')],  | 
|
106  | 
[(2063, 'foo'), (2063, 'bar')],  | 
|
107  | 
[(1040, 'foo'), (1040, 'bar')],  | 
|
108  | 
[(529, 'foo'), (529, 'bar')],  | 
|
109  | 
[(274, 'foo'), (274, 'bar')],  | 
|
110  | 
[(147, 'foo'), (147, 'bar')],  | 
|
111  | 
[(84, 'foo'), (84, 'bar')],  | 
|
112  | 
[(53, 'foo'), (53, 'bar')],  | 
|
113  | 
[(38, 'foo'), (38, 'bar')],  | 
|
114  | 
[(31, 'foo'), (31, 'bar')],  | 
|
115  | 
[(28, 'foo'), (28, 'bar')],  | 
|
116  | 
[(27, 'foo'), (27, 'bar')],  | 
|
117  | 
[(26, 'foo'), (26, 'bar')],  | 
|
118  | 
[(25, 'foo'), (25, 'bar')],  | 
|
119  | 
[(24, 'foo'), (24, 'bar')],  | 
|
120  | 
[(23, 'foo'), (23, 'bar')],  | 
|
121  | 
[(22, 'foo'), (22, 'bar')],  | 
|
122  | 
[(21, 'foo'), (21, 'bar')],  | 
|
123  | 
[(20, 'foo'), (20, 'bar')],  | 
|
124  | 
[(19, 'foo'), (19, 'bar')],  | 
|
125  | 
[(18, 'foo'), (18, 'bar')],  | 
|
126  | 
[(17, 'foo'), (17, 'bar')],  | 
|
127  | 
[(16, 'foo'), (16, 'bar')],  | 
|
128  | 
[(15, 'foo'), (15, 'bar')],  | 
|
129  | 
[(14, 'foo'), (14, 'bar')],  | 
|
130  | 
[(13, 'foo'), (13, 'bar')],  | 
|
131  | 
[(12, 'foo'), (12, 'bar')],  | 
|
132  | 
[(11, 'foo'), (11, 'bar')],  | 
|
133  | 
[(10, 'foo'), (10, 'bar')],  | 
|
134  | 
[(9, 'foo'), (9, 'bar')],  | 
|
135  | 
[(8, 'foo'), (8, 'bar')],  | 
|
136  | 
[(7, 'foo'), (7, 'bar')],  | 
|
137  | 
[(6, 'foo'), (6, 'bar')],  | 
|
138  | 
[(5, 'foo'), (5, 'bar')],  | 
|
139  | 
[(4, 'foo'), (4, 'bar')],  | 
|
140  | 
[(3, 'foo'), (3, 'bar')],  | 
|
141  | 
[(2, 'foo'), (2, 'bar')],  | 
|
142  | 
[(1, 'foo'), (1, 'bar')],  | 
|
143  | 
[(0, 'foo'), (0, 'bar')],  | 
|
144  | 
], calls)  | 
|
145  | 
||
146  | 
def test_lookup_missing_key_after_all_others(self):  | 
|
147  | 
calls = []  | 
|
148  | 
end = None  | 
|
149  | 
def missing_last_content(location_keys):  | 
|
150  | 
            # returns +1 for all keys unless the byte offset is 'end' when it
 | 
|
151  | 
            # returns False
 | 
|
152  | 
calls.append(location_keys)  | 
|
153  | 
result = []  | 
|
154  | 
for location_key in location_keys:  | 
|
155  | 
if location_key[0] == end:  | 
|
156  | 
result.append((location_key, False))  | 
|
157  | 
else:  | 
|
158  | 
result.append((location_key, +1))  | 
|
159  | 
return result  | 
|
160  | 
        # given a 0 length file, this should terminate with one call.
 | 
|
161  | 
end = 0  | 
|
162  | 
self.assertEqual([],  | 
|
163  | 
list(bisect_multi_bytes(missing_last_content, 0, ['foo', 'bar'])))  | 
|
164  | 
self.assertEqual([[(0, 'foo'), (0, 'bar')]], calls)  | 
|
165  | 
del calls[:]  | 
|
166  | 
end = 2  | 
|
167  | 
        # given a 3 length file, this should make two calls - 1, 2.
 | 
|
168  | 
self.assertEqual([],  | 
|
169  | 
list(bisect_multi_bytes(missing_last_content, 3, ['foo', 'bar'])))  | 
|
170  | 
self.assertEqual([  | 
|
171  | 
[(1, 'foo'), (1, 'bar')],  | 
|
172  | 
[(2, 'foo'), (2, 'bar')],  | 
|
173  | 
], calls)  | 
|
174  | 
del calls[:]  | 
|
175  | 
end = 268435456 - 2  | 
|
176  | 
        # see the really-big lookup series in
 | 
|
177  | 
        # test_lookup_missing_key_before_all_others for details about this
 | 
|
178  | 
        # assertion.
 | 
|
179  | 
self.assertEqual([],  | 
|
180  | 
list(bisect_multi_bytes(  | 
|
181  | 
missing_last_content,268435456 - 1 , ['foo', 'bar'])))  | 
|
182  | 
self.assertEqual([  | 
|
183  | 
[(134217727, 'foo'), (134217727, 'bar')],  | 
|
184  | 
[(201326590, 'foo'), (201326590, 'bar')],  | 
|
185  | 
[(234881021, 'foo'), (234881021, 'bar')],  | 
|
186  | 
[(251658236, 'foo'), (251658236, 'bar')],  | 
|
187  | 
[(260046843, 'foo'), (260046843, 'bar')],  | 
|
188  | 
[(264241146, 'foo'), (264241146, 'bar')],  | 
|
189  | 
[(266338297, 'foo'), (266338297, 'bar')],  | 
|
190  | 
[(267386872, 'foo'), (267386872, 'bar')],  | 
|
191  | 
[(267911159, 'foo'), (267911159, 'bar')],  | 
|
192  | 
[(268173302, 'foo'), (268173302, 'bar')],  | 
|
193  | 
[(268304373, 'foo'), (268304373, 'bar')],  | 
|
194  | 
[(268369908, 'foo'), (268369908, 'bar')],  | 
|
195  | 
[(268402675, 'foo'), (268402675, 'bar')],  | 
|
196  | 
[(268419058, 'foo'), (268419058, 'bar')],  | 
|
197  | 
[(268427249, 'foo'), (268427249, 'bar')],  | 
|
198  | 
[(268431344, 'foo'), (268431344, 'bar')],  | 
|
199  | 
[(268433391, 'foo'), (268433391, 'bar')],  | 
|
200  | 
[(268434414, 'foo'), (268434414, 'bar')],  | 
|
201  | 
[(268434925, 'foo'), (268434925, 'bar')],  | 
|
202  | 
[(268435180, 'foo'), (268435180, 'bar')],  | 
|
203  | 
[(268435307, 'foo'), (268435307, 'bar')],  | 
|
204  | 
[(268435370, 'foo'), (268435370, 'bar')],  | 
|
205  | 
[(268435401, 'foo'), (268435401, 'bar')],  | 
|
206  | 
[(268435416, 'foo'), (268435416, 'bar')],  | 
|
207  | 
[(268435423, 'foo'), (268435423, 'bar')],  | 
|
208  | 
[(268435426, 'foo'), (268435426, 'bar')],  | 
|
209  | 
[(268435427, 'foo'), (268435427, 'bar')],  | 
|
210  | 
[(268435428, 'foo'), (268435428, 'bar')],  | 
|
211  | 
[(268435429, 'foo'), (268435429, 'bar')],  | 
|
212  | 
[(268435430, 'foo'), (268435430, 'bar')],  | 
|
213  | 
[(268435431, 'foo'), (268435431, 'bar')],  | 
|
214  | 
[(268435432, 'foo'), (268435432, 'bar')],  | 
|
215  | 
[(268435433, 'foo'), (268435433, 'bar')],  | 
|
216  | 
[(268435434, 'foo'), (268435434, 'bar')],  | 
|
217  | 
[(268435435, 'foo'), (268435435, 'bar')],  | 
|
218  | 
[(268435436, 'foo'), (268435436, 'bar')],  | 
|
219  | 
[(268435437, 'foo'), (268435437, 'bar')],  | 
|
220  | 
[(268435438, 'foo'), (268435438, 'bar')],  | 
|
221  | 
[(268435439, 'foo'), (268435439, 'bar')],  | 
|
222  | 
[(268435440, 'foo'), (268435440, 'bar')],  | 
|
223  | 
[(268435441, 'foo'), (268435441, 'bar')],  | 
|
224  | 
[(268435442, 'foo'), (268435442, 'bar')],  | 
|
225  | 
[(268435443, 'foo'), (268435443, 'bar')],  | 
|
226  | 
[(268435444, 'foo'), (268435444, 'bar')],  | 
|
227  | 
[(268435445, 'foo'), (268435445, 'bar')],  | 
|
228  | 
[(268435446, 'foo'), (268435446, 'bar')],  | 
|
229  | 
[(268435447, 'foo'), (268435447, 'bar')],  | 
|
230  | 
[(268435448, 'foo'), (268435448, 'bar')],  | 
|
231  | 
[(268435449, 'foo'), (268435449, 'bar')],  | 
|
232  | 
[(268435450, 'foo'), (268435450, 'bar')],  | 
|
233  | 
[(268435451, 'foo'), (268435451, 'bar')],  | 
|
234  | 
[(268435452, 'foo'), (268435452, 'bar')],  | 
|
235  | 
[(268435453, 'foo'), (268435453, 'bar')],  | 
|
236  | 
[(268435454, 'foo'), (268435454, 'bar')]  | 
|
237  | 
], calls)  | 
|
238  | 
||
239  | 
def test_lookup_when_a_key_is_missing_continues(self):  | 
|
240  | 
calls = []  | 
|
241  | 
def missing_foo_otherwise_missing_first_content(location_keys):  | 
|
242  | 
            # returns -1 for all keys unless the byte offset is 0 when it
 | 
|
243  | 
            # returns False
 | 
|
244  | 
calls.append(location_keys)  | 
|
245  | 
result = []  | 
|
246  | 
for location_key in location_keys:  | 
|
247  | 
if location_key[1] == 'foo' or location_key[0] == 0:  | 
|
248  | 
result.append((location_key, False))  | 
|
249  | 
else:  | 
|
250  | 
result.append((location_key, -1))  | 
|
251  | 
return result  | 
|
252  | 
        # given a 2 length file, this should terminate with two calls, one for
 | 
|
253  | 
        # both keys, and one for bar only.
 | 
|
254  | 
self.assertEqual([],  | 
|
255  | 
list(bisect_multi_bytes(  | 
|
256  | 
missing_foo_otherwise_missing_first_content, 2,  | 
|
257  | 
['foo', 'bar'])))  | 
|
258  | 
self.assertEqual([  | 
|
259  | 
[(1, 'foo'), (1, 'bar')],  | 
|
260  | 
[(0, 'bar')],  | 
|
261  | 
], calls)  | 
|
262  | 
||
263  | 
def test_found_keys_returned_other_searches_continue(self):  | 
|
264  | 
calls = []  | 
|
265  | 
def find_bar_at_1_foo_missing_at_0(location_keys):  | 
|
266  | 
calls.append(location_keys)  | 
|
267  | 
result = []  | 
|
268  | 
for location_key in location_keys:  | 
|
269  | 
if location_key == (1, 'bar'):  | 
|
270  | 
result.append((location_key, 'bar-result'))  | 
|
271  | 
elif location_key[0] == 0:  | 
|
272  | 
result.append((location_key, False))  | 
|
273  | 
else:  | 
|
274  | 
result.append((location_key, -1))  | 
|
275  | 
return result  | 
|
276  | 
        # given a 4 length file, this should terminate with three calls, two for
 | 
|
277  | 
        # both keys, and one for foo only.
 | 
|
278  | 
self.assertEqual([('bar', 'bar-result')],  | 
|
279  | 
list(bisect_multi_bytes(  | 
|
280  | 
find_bar_at_1_foo_missing_at_0, 4,  | 
|
281  | 
['foo', 'bar'])))  | 
|
282  | 
self.assertEqual([  | 
|
283  | 
[(2, 'foo'), (2, 'bar')],  | 
|
284  | 
[(1, 'foo'), (1, 'bar')],  | 
|
285  | 
[(0, 'foo')],  | 
|
286  | 
], calls)  | 
|
287  | 
||
288  | 
def test_searches_different_keys_in_different_directions(self):  | 
|
289  | 
calls = []  | 
|
290  | 
def missing_bar_at_1_foo_at_3(location_keys):  | 
|
291  | 
calls.append(location_keys)  | 
|
292  | 
result = []  | 
|
293  | 
for location_key in location_keys:  | 
|
294  | 
if location_key[1] == 'bar':  | 
|
295  | 
if location_key[0] == 1:  | 
|
296  | 
result.append((location_key, False))  | 
|
297  | 
else:  | 
|
298  | 
                        # search down
 | 
|
299  | 
result.append((location_key, -1))  | 
|
300  | 
elif location_key[1] == 'foo':  | 
|
301  | 
if location_key[0] == 3:  | 
|
302  | 
result.append((location_key, False))  | 
|
303  | 
else:  | 
|
304  | 
                        # search up
 | 
|
305  | 
result.append((location_key, +1))  | 
|
306  | 
return result  | 
|
307  | 
        # given a 4 length file, this should terminate with two calls.
 | 
|
308  | 
self.assertEqual([],  | 
|
309  | 
list(bisect_multi_bytes(  | 
|
310  | 
missing_bar_at_1_foo_at_3, 4,  | 
|
311  | 
['foo', 'bar'])))  | 
|
312  | 
self.assertEqual([  | 
|
313  | 
[(2, 'foo'), (2, 'bar')],  | 
|
314  | 
[(3, 'foo'), (1, 'bar')],  | 
|
315  | 
], calls)  | 
|
316  | 
||
317  | 
def test_change_direction_in_single_key_search(self):  | 
|
318  | 
        # check that we can search down, up, down again - 
 | 
|
319  | 
        # so length 8, goes 4, 6, 5
 | 
|
320  | 
calls = []  | 
|
321  | 
def missing_at_5(location_keys):  | 
|
322  | 
calls.append(location_keys)  | 
|
323  | 
result = []  | 
|
324  | 
for location_key in location_keys:  | 
|
325  | 
if location_key[0] == 5:  | 
|
326  | 
result.append((location_key, False))  | 
|
327  | 
elif location_key[0] > 5:  | 
|
328  | 
                    # search down
 | 
|
329  | 
result.append((location_key, -1))  | 
|
330  | 
else:  | 
|
331  | 
                    # search up
 | 
|
332  | 
result.append((location_key, +1))  | 
|
333  | 
return result  | 
|
334  | 
        # given a 8 length file, this should terminate with three calls.
 | 
|
335  | 
self.assertEqual([],  | 
|
336  | 
list(bisect_multi_bytes(  | 
|
337  | 
missing_at_5, 8,  | 
|
338  | 
['foo', 'bar'])))  | 
|
339  | 
self.assertEqual([  | 
|
340  | 
[(4, 'foo'), (4, 'bar')],  | 
|
341  | 
[(6, 'foo'), (6, 'bar')],  | 
|
342  | 
[(5, 'foo'), (5, 'bar')],  | 
|
343  | 
], calls)  | 
|
344  |