/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/tests/test_knit.py

  • Committer: Jelmer Vernooij
  • Date: 2018-11-23 01:35:56 UTC
  • mto: (7211.10.3 git-empty-dirs)
  • mto: This revision was merged to the branch mainline in revision 7215.
  • Revision ID: jelmer@jelmer.uk-20181123013556-mu7ct9ovl7fozjc2
Update comment about ssl.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2006-2011 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for Knit data structure"""
 
18
 
 
19
import gzip
 
20
from io import BytesIO
 
21
import sys
 
22
 
 
23
from .. import (
 
24
    errors,
 
25
    multiparent,
 
26
    osutils,
 
27
    tests,
 
28
    transport,
 
29
    )
 
30
from ..bzr import (
 
31
    knit,
 
32
    pack,
 
33
    )
 
34
from ..bzr.index import *
 
35
from ..bzr.knit import (
 
36
    AnnotatedKnitContent,
 
37
    KnitContent,
 
38
    KnitCorrupt,
 
39
    KnitDataStreamIncompatible,
 
40
    KnitDataStreamUnknown,
 
41
    KnitHeaderError,
 
42
    KnitIndexUnknownMethod,
 
43
    KnitVersionedFiles,
 
44
    PlainKnitContent,
 
45
    _VFContentMapGenerator,
 
46
    _KndxIndex,
 
47
    _KnitGraphIndex,
 
48
    _KnitKeyAccess,
 
49
    make_file_factory,
 
50
    )
 
51
from ..patiencediff import PatienceSequenceMatcher
 
52
from ..bzr import (
 
53
    knitpack_repo,
 
54
    pack_repo,
 
55
    )
 
56
from . import (
 
57
    TestCase,
 
58
    TestCaseWithMemoryTransport,
 
59
    TestCaseWithTransport,
 
60
    TestNotApplicable,
 
61
    )
 
62
from ..bzr.versionedfile import (
 
63
    AbsentContentFactory,
 
64
    ConstantMapper,
 
65
    network_bytes_to_kind_and_offset,
 
66
    RecordingVersionedFilesDecorator,
 
67
    )
 
68
from . import (
 
69
    features,
 
70
    )
 
71
 
 
72
 
 
73
compiled_knit_feature = features.ModuleAvailableFeature(
 
74
    'breezy.bzr._knit_load_data_pyx')
 
75
 
 
76
 
 
77
class ErrorTests(TestCase):
 
78
 
 
79
    def test_knit_data_stream_incompatible(self):
 
80
        error = KnitDataStreamIncompatible(
 
81
            'stream format', 'target format')
 
82
        self.assertEqual('Cannot insert knit data stream of format '
 
83
                         '"stream format" into knit of format '
 
84
                         '"target format".', str(error))
 
85
 
 
86
    def test_knit_data_stream_unknown(self):
 
87
        error = KnitDataStreamUnknown(
 
88
            'stream format')
 
89
        self.assertEqual('Cannot parse knit data stream of format '
 
90
                         '"stream format".', str(error))
 
91
 
 
92
    def test_knit_header_error(self):
 
93
        error = KnitHeaderError('line foo\n', 'path/to/file')
 
94
        self.assertEqual("Knit header error: 'line foo\\n' unexpected"
 
95
                         " for file \"path/to/file\".", str(error))
 
96
 
 
97
    def test_knit_index_unknown_method(self):
 
98
        error = KnitIndexUnknownMethod('http://host/foo.kndx',
 
99
                                       ['bad', 'no-eol'])
 
100
        self.assertEqual("Knit index http://host/foo.kndx does not have a"
 
101
                         " known method in options: ['bad', 'no-eol']",
 
102
                         str(error))
 
103
 
 
104
 
 
105
class KnitContentTestsMixin(object):
 
106
 
 
107
    def test_constructor(self):
 
108
        content = self._make_content([])
 
109
 
 
110
    def test_text(self):
 
111
        content = self._make_content([])
 
112
        self.assertEqual(content.text(), [])
 
113
 
 
114
        content = self._make_content(
 
115
            [(b"origin1", b"text1"), (b"origin2", b"text2")])
 
116
        self.assertEqual(content.text(), [b"text1", b"text2"])
 
117
 
 
118
    def test_copy(self):
 
119
        content = self._make_content(
 
120
            [(b"origin1", b"text1"), (b"origin2", b"text2")])
 
121
        copy = content.copy()
 
122
        self.assertIsInstance(copy, content.__class__)
 
123
        self.assertEqual(copy.annotate(), content.annotate())
 
124
 
 
125
    def assertDerivedBlocksEqual(self, source, target, noeol=False):
 
126
        """Assert that the derived matching blocks match real output"""
 
127
        source_lines = source.splitlines(True)
 
128
        target_lines = target.splitlines(True)
 
129
 
 
130
        def nl(line):
 
131
            if noeol and not line.endswith('\n'):
 
132
                return line + '\n'
 
133
            else:
 
134
                return line
 
135
        source_content = self._make_content(
 
136
            [(None, nl(l)) for l in source_lines])
 
137
        target_content = self._make_content(
 
138
            [(None, nl(l)) for l in target_lines])
 
139
        line_delta = source_content.line_delta(target_content)
 
140
        delta_blocks = list(KnitContent.get_line_delta_blocks(line_delta,
 
141
                                                              source_lines, target_lines))
 
142
        matcher = PatienceSequenceMatcher(None, source_lines, target_lines)
 
143
        matcher_blocks = list(matcher.get_matching_blocks())
 
144
        self.assertEqual(matcher_blocks, delta_blocks)
 
145
 
 
146
    def test_get_line_delta_blocks(self):
 
147
        self.assertDerivedBlocksEqual('a\nb\nc\n', 'q\nc\n')
 
148
        self.assertDerivedBlocksEqual(TEXT_1, TEXT_1)
 
149
        self.assertDerivedBlocksEqual(TEXT_1, TEXT_1A)
 
150
        self.assertDerivedBlocksEqual(TEXT_1, TEXT_1B)
 
151
        self.assertDerivedBlocksEqual(TEXT_1B, TEXT_1A)
 
152
        self.assertDerivedBlocksEqual(TEXT_1A, TEXT_1B)
 
153
        self.assertDerivedBlocksEqual(TEXT_1A, '')
 
154
        self.assertDerivedBlocksEqual('', TEXT_1A)
 
155
        self.assertDerivedBlocksEqual('', '')
 
156
        self.assertDerivedBlocksEqual('a\nb\nc', 'a\nb\nc\nd')
 
157
 
 
158
    def test_get_line_delta_blocks_noeol(self):
 
159
        """Handle historical knit deltas safely
 
160
 
 
161
        Some existing knit deltas don't consider the last line to differ
 
162
        when the only difference whether it has a final newline.
 
163
 
 
164
        New knit deltas appear to always consider the last line to differ
 
165
        in this case.
 
166
        """
 
167
        self.assertDerivedBlocksEqual('a\nb\nc', 'a\nb\nc\nd\n', noeol=True)
 
168
        self.assertDerivedBlocksEqual('a\nb\nc\nd\n', 'a\nb\nc', noeol=True)
 
169
        self.assertDerivedBlocksEqual('a\nb\nc\n', 'a\nb\nc', noeol=True)
 
170
        self.assertDerivedBlocksEqual('a\nb\nc', 'a\nb\nc\n', noeol=True)
 
171
 
 
172
 
 
173
TEXT_1 = """\
 
174
Banana cup cakes:
 
175
 
 
176
- bananas
 
177
- eggs
 
178
- broken tea cups
 
179
"""
 
180
 
 
181
TEXT_1A = """\
 
182
Banana cup cake recipe
 
183
(serves 6)
 
184
 
 
185
- bananas
 
186
- eggs
 
187
- broken tea cups
 
188
- self-raising flour
 
189
"""
 
190
 
 
191
TEXT_1B = """\
 
192
Banana cup cake recipe
 
193
 
 
194
- bananas (do not use plantains!!!)
 
195
- broken tea cups
 
196
- flour
 
197
"""
 
198
 
 
199
delta_1_1a = """\
 
200
0,1,2
 
201
Banana cup cake recipe
 
202
(serves 6)
 
203
5,5,1
 
204
- self-raising flour
 
205
"""
 
206
 
 
207
TEXT_2 = """\
 
208
Boeuf bourguignon
 
209
 
 
210
- beef
 
211
- red wine
 
212
- small onions
 
213
- carrot
 
214
- mushrooms
 
215
"""
 
216
 
 
217
 
 
218
class TestPlainKnitContent(TestCase, KnitContentTestsMixin):
 
219
 
 
220
    def _make_content(self, lines):
 
221
        annotated_content = AnnotatedKnitContent(lines)
 
222
        return PlainKnitContent(annotated_content.text(), 'bogus')
 
223
 
 
224
    def test_annotate(self):
 
225
        content = self._make_content([])
 
226
        self.assertEqual(content.annotate(), [])
 
227
 
 
228
        content = self._make_content(
 
229
            [("origin1", "text1"), ("origin2", "text2")])
 
230
        self.assertEqual(content.annotate(),
 
231
                         [("bogus", "text1"), ("bogus", "text2")])
 
232
 
 
233
    def test_line_delta(self):
 
234
        content1 = self._make_content([("", "a"), ("", "b")])
 
235
        content2 = self._make_content([("", "a"), ("", "a"), ("", "c")])
 
236
        self.assertEqual(content1.line_delta(content2),
 
237
                         [(1, 2, 2, ["a", "c"])])
 
238
 
 
239
    def test_line_delta_iter(self):
 
240
        content1 = self._make_content([("", "a"), ("", "b")])
 
241
        content2 = self._make_content([("", "a"), ("", "a"), ("", "c")])
 
242
        it = content1.line_delta_iter(content2)
 
243
        self.assertEqual(next(it), (1, 2, 2, ["a", "c"]))
 
244
        self.assertRaises(StopIteration, next, it)
 
245
 
 
246
 
 
247
class TestAnnotatedKnitContent(TestCase, KnitContentTestsMixin):
 
248
 
 
249
    def _make_content(self, lines):
 
250
        return AnnotatedKnitContent(lines)
 
251
 
 
252
    def test_annotate(self):
 
253
        content = self._make_content([])
 
254
        self.assertEqual(content.annotate(), [])
 
255
 
 
256
        content = self._make_content(
 
257
            [(b"origin1", b"text1"), (b"origin2", b"text2")])
 
258
        self.assertEqual(content.annotate(),
 
259
                         [(b"origin1", b"text1"), (b"origin2", b"text2")])
 
260
 
 
261
    def test_line_delta(self):
 
262
        content1 = self._make_content([("", "a"), ("", "b")])
 
263
        content2 = self._make_content([("", "a"), ("", "a"), ("", "c")])
 
264
        self.assertEqual(content1.line_delta(content2),
 
265
                         [(1, 2, 2, [("", "a"), ("", "c")])])
 
266
 
 
267
    def test_line_delta_iter(self):
 
268
        content1 = self._make_content([("", "a"), ("", "b")])
 
269
        content2 = self._make_content([("", "a"), ("", "a"), ("", "c")])
 
270
        it = content1.line_delta_iter(content2)
 
271
        self.assertEqual(next(it), (1, 2, 2, [("", "a"), ("", "c")]))
 
272
        self.assertRaises(StopIteration, next, it)
 
273
 
 
274
 
 
275
class MockTransport(object):
 
276
 
 
277
    def __init__(self, file_lines=None):
 
278
        self.file_lines = file_lines
 
279
        self.calls = []
 
280
        # We have no base directory for the MockTransport
 
281
        self.base = ''
 
282
 
 
283
    def get(self, filename):
 
284
        if self.file_lines is None:
 
285
            raise errors.NoSuchFile(filename)
 
286
        else:
 
287
            return BytesIO(b"\n".join(self.file_lines))
 
288
 
 
289
    def readv(self, relpath, offsets):
 
290
        fp = self.get(relpath)
 
291
        for offset, size in offsets:
 
292
            fp.seek(offset)
 
293
            yield offset, fp.read(size)
 
294
 
 
295
    def __getattr__(self, name):
 
296
        def queue_call(*args, **kwargs):
 
297
            self.calls.append((name, args, kwargs))
 
298
        return queue_call
 
299
 
 
300
 
 
301
class MockReadvFailingTransport(MockTransport):
 
302
    """Fail in the middle of a readv() result.
 
303
 
 
304
    This Transport will successfully yield the first two requested hunks, but
 
305
    raise NoSuchFile for the rest.
 
306
    """
 
307
 
 
308
    def readv(self, relpath, offsets):
 
309
        count = 0
 
310
        for result in MockTransport.readv(self, relpath, offsets):
 
311
            count += 1
 
312
            # we use 2 because the first offset is the pack header, the second
 
313
            # is the first actual content requset
 
314
            if count > 2:
 
315
                raise errors.NoSuchFile(relpath)
 
316
            yield result
 
317
 
 
318
 
 
319
class KnitRecordAccessTestsMixin(object):
 
320
    """Tests for getting and putting knit records."""
 
321
 
 
322
    def test_add_raw_records(self):
 
323
        """Add_raw_records adds records retrievable later."""
 
324
        access = self.get_access()
 
325
        memos = access.add_raw_records([(b'key', 10)], b'1234567890')
 
326
        self.assertEqual([b'1234567890'], list(access.get_raw_records(memos)))
 
327
 
 
328
    def test_add_several_raw_records(self):
 
329
        """add_raw_records with many records and read some back."""
 
330
        access = self.get_access()
 
331
        memos = access.add_raw_records([(b'key', 10), (b'key2', 2), (b'key3', 5)],
 
332
                                       b'12345678901234567')
 
333
        self.assertEqual([b'1234567890', b'12', b'34567'],
 
334
                         list(access.get_raw_records(memos)))
 
335
        self.assertEqual([b'1234567890'],
 
336
                         list(access.get_raw_records(memos[0:1])))
 
337
        self.assertEqual([b'12'],
 
338
                         list(access.get_raw_records(memos[1:2])))
 
339
        self.assertEqual([b'34567'],
 
340
                         list(access.get_raw_records(memos[2:3])))
 
341
        self.assertEqual([b'1234567890', b'34567'],
 
342
                         list(access.get_raw_records(memos[0:1] + memos[2:3])))
 
343
 
 
344
 
 
345
class TestKnitKnitAccess(TestCaseWithMemoryTransport, KnitRecordAccessTestsMixin):
 
346
    """Tests for the .kndx implementation."""
 
347
 
 
348
    def get_access(self):
 
349
        """Get a .knit style access instance."""
 
350
        mapper = ConstantMapper("foo")
 
351
        access = _KnitKeyAccess(self.get_transport(), mapper)
 
352
        return access
 
353
 
 
354
 
 
355
class _TestException(Exception):
 
356
    """Just an exception for local tests to use."""
 
357
 
 
358
 
 
359
class TestPackKnitAccess(TestCaseWithMemoryTransport, KnitRecordAccessTestsMixin):
 
360
    """Tests for the pack based access."""
 
361
 
 
362
    def get_access(self):
 
363
        return self._get_access()[0]
 
364
 
 
365
    def _get_access(self, packname='packfile', index='FOO'):
 
366
        transport = self.get_transport()
 
367
 
 
368
        def write_data(bytes):
 
369
            transport.append_bytes(packname, bytes)
 
370
        writer = pack.ContainerWriter(write_data)
 
371
        writer.begin()
 
372
        access = pack_repo._DirectPackAccess({})
 
373
        access.set_writer(writer, index, (transport, packname))
 
374
        return access, writer
 
375
 
 
376
    def make_pack_file(self):
 
377
        """Create a pack file with 2 records."""
 
378
        access, writer = self._get_access(packname='packname', index='foo')
 
379
        memos = []
 
380
        memos.extend(access.add_raw_records([(b'key1', 10)], b'1234567890'))
 
381
        memos.extend(access.add_raw_records([(b'key2', 5)], b'12345'))
 
382
        writer.end()
 
383
        return memos
 
384
 
 
385
    def test_pack_collection_pack_retries(self):
 
386
        """An explicit pack of a pack collection succeeds even when a
 
387
        concurrent pack happens.
 
388
        """
 
389
        builder = self.make_branch_builder('.')
 
390
        builder.start_series()
 
391
        builder.build_snapshot(None, [
 
392
            ('add', ('', b'root-id', 'directory', None)),
 
393
            ('add', ('file', b'file-id', 'file', b'content\nrev 1\n')),
 
394
            ], revision_id=b'rev-1')
 
395
        builder.build_snapshot([b'rev-1'], [
 
396
            ('modify', ('file', b'content\nrev 2\n')),
 
397
            ], revision_id=b'rev-2')
 
398
        builder.build_snapshot([b'rev-2'], [
 
399
            ('modify', ('file', b'content\nrev 3\n')),
 
400
            ], revision_id=b'rev-3')
 
401
        self.addCleanup(builder.finish_series)
 
402
        b = builder.get_branch()
 
403
        self.addCleanup(b.lock_write().unlock)
 
404
        repo = b.repository
 
405
        collection = repo._pack_collection
 
406
        # Concurrently repack the repo.
 
407
        reopened_repo = repo.controldir.open_repository()
 
408
        reopened_repo.pack()
 
409
        # Pack the new pack.
 
410
        collection.pack()
 
411
 
 
412
    def make_vf_for_retrying(self):
 
413
        """Create 3 packs and a reload function.
 
414
 
 
415
        Originally, 2 pack files will have the data, but one will be missing.
 
416
        And then the third will be used in place of the first two if reload()
 
417
        is called.
 
418
 
 
419
        :return: (versioned_file, reload_counter)
 
420
            versioned_file  a KnitVersionedFiles using the packs for access
 
421
        """
 
422
        builder = self.make_branch_builder('.', format="1.9")
 
423
        builder.start_series()
 
424
        builder.build_snapshot(None, [
 
425
            ('add', ('', b'root-id', 'directory', None)),
 
426
            ('add', ('file', b'file-id', 'file', b'content\nrev 1\n')),
 
427
            ], revision_id=b'rev-1')
 
428
        builder.build_snapshot([b'rev-1'], [
 
429
            ('modify', ('file', b'content\nrev 2\n')),
 
430
            ], revision_id=b'rev-2')
 
431
        builder.build_snapshot([b'rev-2'], [
 
432
            ('modify', ('file', b'content\nrev 3\n')),
 
433
            ], revision_id=b'rev-3')
 
434
        builder.finish_series()
 
435
        b = builder.get_branch()
 
436
        b.lock_write()
 
437
        self.addCleanup(b.unlock)
 
438
        # Pack these three revisions into another pack file, but don't remove
 
439
        # the originals
 
440
        repo = b.repository
 
441
        collection = repo._pack_collection
 
442
        collection.ensure_loaded()
 
443
        orig_packs = collection.packs
 
444
        packer = knitpack_repo.KnitPacker(collection, orig_packs, '.testpack')
 
445
        new_pack = packer.pack()
 
446
        # forget about the new pack
 
447
        collection.reset()
 
448
        repo.refresh_data()
 
449
        vf = repo.revisions
 
450
        # Set up a reload() function that switches to using the new pack file
 
451
        new_index = new_pack.revision_index
 
452
        access_tuple = new_pack.access_tuple()
 
453
        reload_counter = [0, 0, 0]
 
454
 
 
455
        def reload():
 
456
            reload_counter[0] += 1
 
457
            if reload_counter[1] > 0:
 
458
                # We already reloaded, nothing more to do
 
459
                reload_counter[2] += 1
 
460
                return False
 
461
            reload_counter[1] += 1
 
462
            vf._index._graph_index._indices[:] = [new_index]
 
463
            vf._access._indices.clear()
 
464
            vf._access._indices[new_index] = access_tuple
 
465
            return True
 
466
        # Delete one of the pack files so the data will need to be reloaded. We
 
467
        # will delete the file with 'rev-2' in it
 
468
        trans, name = orig_packs[1].access_tuple()
 
469
        trans.delete(name)
 
470
        # We don't have the index trigger reloading because we want to test
 
471
        # that we reload when the .pack disappears
 
472
        vf._access._reload_func = reload
 
473
        return vf, reload_counter
 
474
 
 
475
    def make_reload_func(self, return_val=True):
 
476
        reload_called = [0]
 
477
 
 
478
        def reload():
 
479
            reload_called[0] += 1
 
480
            return return_val
 
481
        return reload_called, reload
 
482
 
 
483
    def make_retry_exception(self):
 
484
        # We raise a real exception so that sys.exc_info() is properly
 
485
        # populated
 
486
        try:
 
487
            raise _TestException('foobar')
 
488
        except _TestException as e:
 
489
            retry_exc = errors.RetryWithNewPacks(None, reload_occurred=False,
 
490
                                                 exc_info=sys.exc_info())
 
491
        # GZ 2010-08-10: Cycle with exc_info affects 3 tests
 
492
        return retry_exc
 
493
 
 
494
    def test_read_from_several_packs(self):
 
495
        access, writer = self._get_access()
 
496
        memos = []
 
497
        memos.extend(access.add_raw_records([(b'key', 10)], b'1234567890'))
 
498
        writer.end()
 
499
        access, writer = self._get_access('pack2', 'FOOBAR')
 
500
        memos.extend(access.add_raw_records([(b'key', 5)], b'12345'))
 
501
        writer.end()
 
502
        access, writer = self._get_access('pack3', 'BAZ')
 
503
        memos.extend(access.add_raw_records([(b'key', 5)], b'alpha'))
 
504
        writer.end()
 
505
        transport = self.get_transport()
 
506
        access = pack_repo._DirectPackAccess({"FOO": (transport, 'packfile'),
 
507
                                              "FOOBAR": (transport, 'pack2'),
 
508
                                              "BAZ": (transport, 'pack3')})
 
509
        self.assertEqual([b'1234567890', b'12345', b'alpha'],
 
510
                         list(access.get_raw_records(memos)))
 
511
        self.assertEqual([b'1234567890'],
 
512
                         list(access.get_raw_records(memos[0:1])))
 
513
        self.assertEqual([b'12345'],
 
514
                         list(access.get_raw_records(memos[1:2])))
 
515
        self.assertEqual([b'alpha'],
 
516
                         list(access.get_raw_records(memos[2:3])))
 
517
        self.assertEqual([b'1234567890', b'alpha'],
 
518
                         list(access.get_raw_records(memos[0:1] + memos[2:3])))
 
519
 
 
520
    def test_set_writer(self):
 
521
        """The writer should be settable post construction."""
 
522
        access = pack_repo._DirectPackAccess({})
 
523
        transport = self.get_transport()
 
524
        packname = 'packfile'
 
525
        index = 'foo'
 
526
 
 
527
        def write_data(bytes):
 
528
            transport.append_bytes(packname, bytes)
 
529
        writer = pack.ContainerWriter(write_data)
 
530
        writer.begin()
 
531
        access.set_writer(writer, index, (transport, packname))
 
532
        memos = access.add_raw_records([(b'key', 10)], b'1234567890')
 
533
        writer.end()
 
534
        self.assertEqual([b'1234567890'], list(access.get_raw_records(memos)))
 
535
 
 
536
    def test_missing_index_raises_retry(self):
 
537
        memos = self.make_pack_file()
 
538
        transport = self.get_transport()
 
539
        reload_called, reload_func = self.make_reload_func()
 
540
        # Note that the index key has changed from 'foo' to 'bar'
 
541
        access = pack_repo._DirectPackAccess({'bar': (transport, 'packname')},
 
542
                                             reload_func=reload_func)
 
543
        e = self.assertListRaises(errors.RetryWithNewPacks,
 
544
                                  access.get_raw_records, memos)
 
545
        # Because a key was passed in which does not match our index list, we
 
546
        # assume that the listing was already reloaded
 
547
        self.assertTrue(e.reload_occurred)
 
548
        self.assertIsInstance(e.exc_info, tuple)
 
549
        self.assertIs(e.exc_info[0], KeyError)
 
550
        self.assertIsInstance(e.exc_info[1], KeyError)
 
551
 
 
552
    def test_missing_index_raises_key_error_with_no_reload(self):
 
553
        memos = self.make_pack_file()
 
554
        transport = self.get_transport()
 
555
        # Note that the index key has changed from 'foo' to 'bar'
 
556
        access = pack_repo._DirectPackAccess({'bar': (transport, 'packname')})
 
557
        e = self.assertListRaises(KeyError, access.get_raw_records, memos)
 
558
 
 
559
    def test_missing_file_raises_retry(self):
 
560
        memos = self.make_pack_file()
 
561
        transport = self.get_transport()
 
562
        reload_called, reload_func = self.make_reload_func()
 
563
        # Note that the 'filename' has been changed to 'different-packname'
 
564
        access = pack_repo._DirectPackAccess(
 
565
            {'foo': (transport, 'different-packname')},
 
566
            reload_func=reload_func)
 
567
        e = self.assertListRaises(errors.RetryWithNewPacks,
 
568
                                  access.get_raw_records, memos)
 
569
        # The file has gone missing, so we assume we need to reload
 
570
        self.assertFalse(e.reload_occurred)
 
571
        self.assertIsInstance(e.exc_info, tuple)
 
572
        self.assertIs(e.exc_info[0], errors.NoSuchFile)
 
573
        self.assertIsInstance(e.exc_info[1], errors.NoSuchFile)
 
574
        self.assertEqual('different-packname', e.exc_info[1].path)
 
575
 
 
576
    def test_missing_file_raises_no_such_file_with_no_reload(self):
 
577
        memos = self.make_pack_file()
 
578
        transport = self.get_transport()
 
579
        # Note that the 'filename' has been changed to 'different-packname'
 
580
        access = pack_repo._DirectPackAccess(
 
581
            {'foo': (transport, 'different-packname')})
 
582
        e = self.assertListRaises(errors.NoSuchFile,
 
583
                                  access.get_raw_records, memos)
 
584
 
 
585
    def test_failing_readv_raises_retry(self):
 
586
        memos = self.make_pack_file()
 
587
        transport = self.get_transport()
 
588
        failing_transport = MockReadvFailingTransport(
 
589
            [transport.get_bytes('packname')])
 
590
        reload_called, reload_func = self.make_reload_func()
 
591
        access = pack_repo._DirectPackAccess(
 
592
            {'foo': (failing_transport, 'packname')},
 
593
            reload_func=reload_func)
 
594
        # Asking for a single record will not trigger the Mock failure
 
595
        self.assertEqual([b'1234567890'],
 
596
                         list(access.get_raw_records(memos[:1])))
 
597
        self.assertEqual([b'12345'],
 
598
                         list(access.get_raw_records(memos[1:2])))
 
599
        # A multiple offset readv() will fail mid-way through
 
600
        e = self.assertListRaises(errors.RetryWithNewPacks,
 
601
                                  access.get_raw_records, memos)
 
602
        # The file has gone missing, so we assume we need to reload
 
603
        self.assertFalse(e.reload_occurred)
 
604
        self.assertIsInstance(e.exc_info, tuple)
 
605
        self.assertIs(e.exc_info[0], errors.NoSuchFile)
 
606
        self.assertIsInstance(e.exc_info[1], errors.NoSuchFile)
 
607
        self.assertEqual('packname', e.exc_info[1].path)
 
608
 
 
609
    def test_failing_readv_raises_no_such_file_with_no_reload(self):
 
610
        memos = self.make_pack_file()
 
611
        transport = self.get_transport()
 
612
        failing_transport = MockReadvFailingTransport(
 
613
            [transport.get_bytes('packname')])
 
614
        reload_called, reload_func = self.make_reload_func()
 
615
        access = pack_repo._DirectPackAccess(
 
616
            {'foo': (failing_transport, 'packname')})
 
617
        # Asking for a single record will not trigger the Mock failure
 
618
        self.assertEqual([b'1234567890'],
 
619
                         list(access.get_raw_records(memos[:1])))
 
620
        self.assertEqual([b'12345'],
 
621
                         list(access.get_raw_records(memos[1:2])))
 
622
        # A multiple offset readv() will fail mid-way through
 
623
        e = self.assertListRaises(errors.NoSuchFile,
 
624
                                  access.get_raw_records, memos)
 
625
 
 
626
    def test_reload_or_raise_no_reload(self):
 
627
        access = pack_repo._DirectPackAccess({}, reload_func=None)
 
628
        retry_exc = self.make_retry_exception()
 
629
        # Without a reload_func, we will just re-raise the original exception
 
630
        self.assertRaises(_TestException, access.reload_or_raise, retry_exc)
 
631
 
 
632
    def test_reload_or_raise_reload_changed(self):
 
633
        reload_called, reload_func = self.make_reload_func(return_val=True)
 
634
        access = pack_repo._DirectPackAccess({}, reload_func=reload_func)
 
635
        retry_exc = self.make_retry_exception()
 
636
        access.reload_or_raise(retry_exc)
 
637
        self.assertEqual([1], reload_called)
 
638
        retry_exc.reload_occurred = True
 
639
        access.reload_or_raise(retry_exc)
 
640
        self.assertEqual([2], reload_called)
 
641
 
 
642
    def test_reload_or_raise_reload_no_change(self):
 
643
        reload_called, reload_func = self.make_reload_func(return_val=False)
 
644
        access = pack_repo._DirectPackAccess({}, reload_func=reload_func)
 
645
        retry_exc = self.make_retry_exception()
 
646
        # If reload_occurred is False, then we consider it an error to have
 
647
        # reload_func() return False (no changes).
 
648
        self.assertRaises(_TestException, access.reload_or_raise, retry_exc)
 
649
        self.assertEqual([1], reload_called)
 
650
        retry_exc.reload_occurred = True
 
651
        # If reload_occurred is True, then we assume nothing changed because
 
652
        # it had changed earlier, but didn't change again
 
653
        access.reload_or_raise(retry_exc)
 
654
        self.assertEqual([2], reload_called)
 
655
 
 
656
    def test_annotate_retries(self):
 
657
        vf, reload_counter = self.make_vf_for_retrying()
 
658
        # It is a little bit bogus to annotate the Revision VF, but it works,
 
659
        # as we have ancestry stored there
 
660
        key = (b'rev-3',)
 
661
        reload_lines = vf.annotate(key)
 
662
        self.assertEqual([1, 1, 0], reload_counter)
 
663
        plain_lines = vf.annotate(key)
 
664
        self.assertEqual([1, 1, 0], reload_counter)  # No extra reloading
 
665
        if reload_lines != plain_lines:
 
666
            self.fail('Annotation was not identical with reloading.')
 
667
        # Now delete the packs-in-use, which should trigger another reload, but
 
668
        # this time we just raise an exception because we can't recover
 
669
        for trans, name in vf._access._indices.values():
 
670
            trans.delete(name)
 
671
        self.assertRaises(errors.NoSuchFile, vf.annotate, key)
 
672
        self.assertEqual([2, 1, 1], reload_counter)
 
673
 
 
674
    def test__get_record_map_retries(self):
 
675
        vf, reload_counter = self.make_vf_for_retrying()
 
676
        keys = [(b'rev-1',), (b'rev-2',), (b'rev-3',)]
 
677
        records = vf._get_record_map(keys)
 
678
        self.assertEqual(keys, sorted(records.keys()))
 
679
        self.assertEqual([1, 1, 0], reload_counter)
 
680
        # Now delete the packs-in-use, which should trigger another reload, but
 
681
        # this time we just raise an exception because we can't recover
 
682
        for trans, name in vf._access._indices.values():
 
683
            trans.delete(name)
 
684
        self.assertRaises(errors.NoSuchFile, vf._get_record_map, keys)
 
685
        self.assertEqual([2, 1, 1], reload_counter)
 
686
 
 
687
    def test_get_record_stream_retries(self):
 
688
        vf, reload_counter = self.make_vf_for_retrying()
 
689
        keys = [(b'rev-1',), (b'rev-2',), (b'rev-3',)]
 
690
        record_stream = vf.get_record_stream(keys, 'topological', False)
 
691
        record = next(record_stream)
 
692
        self.assertEqual((b'rev-1',), record.key)
 
693
        self.assertEqual([0, 0, 0], reload_counter)
 
694
        record = next(record_stream)
 
695
        self.assertEqual((b'rev-2',), record.key)
 
696
        self.assertEqual([1, 1, 0], reload_counter)
 
697
        record = next(record_stream)
 
698
        self.assertEqual((b'rev-3',), record.key)
 
699
        self.assertEqual([1, 1, 0], reload_counter)
 
700
        # Now delete all pack files, and see that we raise the right error
 
701
        for trans, name in vf._access._indices.values():
 
702
            trans.delete(name)
 
703
        self.assertListRaises(errors.NoSuchFile,
 
704
                              vf.get_record_stream, keys, 'topological', False)
 
705
 
 
706
    def test_iter_lines_added_or_present_in_keys_retries(self):
 
707
        vf, reload_counter = self.make_vf_for_retrying()
 
708
        keys = [(b'rev-1',), (b'rev-2',), (b'rev-3',)]
 
709
        # Unfortunately, iter_lines_added_or_present_in_keys iterates the
 
710
        # result in random order (determined by the iteration order from a
 
711
        # set()), so we don't have any solid way to trigger whether data is
 
712
        # read before or after. However we tried to delete the middle node to
 
713
        # exercise the code well.
 
714
        # What we care about is that all lines are always yielded, but not
 
715
        # duplicated
 
716
        count = 0
 
717
        reload_lines = sorted(vf.iter_lines_added_or_present_in_keys(keys))
 
718
        self.assertEqual([1, 1, 0], reload_counter)
 
719
        # Now do it again, to make sure the result is equivalent
 
720
        plain_lines = sorted(vf.iter_lines_added_or_present_in_keys(keys))
 
721
        self.assertEqual([1, 1, 0], reload_counter)  # No extra reloading
 
722
        self.assertEqual(plain_lines, reload_lines)
 
723
        self.assertEqual(21, len(plain_lines))
 
724
        # Now delete all pack files, and see that we raise the right error
 
725
        for trans, name in vf._access._indices.values():
 
726
            trans.delete(name)
 
727
        self.assertListRaises(errors.NoSuchFile,
 
728
                              vf.iter_lines_added_or_present_in_keys, keys)
 
729
        self.assertEqual([2, 1, 1], reload_counter)
 
730
 
 
731
    def test_get_record_stream_yields_disk_sorted_order(self):
 
732
        # if we get 'unordered' pick a semi-optimal order for reading. The
 
733
        # order should be grouped by pack file, and then by position in file
 
734
        repo = self.make_repository('test', format='pack-0.92')
 
735
        repo.lock_write()
 
736
        self.addCleanup(repo.unlock)
 
737
        repo.start_write_group()
 
738
        vf = repo.texts
 
739
        vf.add_lines((b'f-id', b'rev-5'), [(b'f-id', b'rev-4')], [b'lines\n'])
 
740
        vf.add_lines((b'f-id', b'rev-1'), [], [b'lines\n'])
 
741
        vf.add_lines((b'f-id', b'rev-2'), [(b'f-id', b'rev-1')], [b'lines\n'])
 
742
        repo.commit_write_group()
 
743
        # We inserted them as rev-5, rev-1, rev-2, we should get them back in
 
744
        # the same order
 
745
        stream = vf.get_record_stream([(b'f-id', b'rev-1'), (b'f-id', b'rev-5'),
 
746
                                       (b'f-id', b'rev-2')], 'unordered', False)
 
747
        keys = [r.key for r in stream]
 
748
        self.assertEqual([(b'f-id', b'rev-5'), (b'f-id', b'rev-1'),
 
749
                          (b'f-id', b'rev-2')], keys)
 
750
        repo.start_write_group()
 
751
        vf.add_lines((b'f-id', b'rev-4'), [(b'f-id', b'rev-3')], [b'lines\n'])
 
752
        vf.add_lines((b'f-id', b'rev-3'), [(b'f-id', b'rev-2')], [b'lines\n'])
 
753
        vf.add_lines((b'f-id', b'rev-6'), [(b'f-id', b'rev-5')], [b'lines\n'])
 
754
        repo.commit_write_group()
 
755
        # Request in random order, to make sure the output order isn't based on
 
756
        # the request
 
757
        request_keys = set((b'f-id', b'rev-%d' % i) for i in range(1, 7))
 
758
        stream = vf.get_record_stream(request_keys, 'unordered', False)
 
759
        keys = [r.key for r in stream]
 
760
        # We want to get the keys back in disk order, but it doesn't matter
 
761
        # which pack we read from first. So this can come back in 2 orders
 
762
        alt1 = [(b'f-id', b'rev-%d' % i) for i in [4, 3, 6, 5, 1, 2]]
 
763
        alt2 = [(b'f-id', b'rev-%d' % i) for i in [5, 1, 2, 4, 3, 6]]
 
764
        if keys != alt1 and keys != alt2:
 
765
            self.fail('Returned key order did not match either expected order.'
 
766
                      ' expected %s or %s, not %s'
 
767
                      % (alt1, alt2, keys))
 
768
 
 
769
 
 
770
class LowLevelKnitDataTests(TestCase):
 
771
 
 
772
    def create_gz_content(self, text):
 
773
        sio = BytesIO()
 
774
        with gzip.GzipFile(mode='wb', fileobj=sio) as gz_file:
 
775
            gz_file.write(text)
 
776
        return sio.getvalue()
 
777
 
 
778
    def make_multiple_records(self):
 
779
        """Create the content for multiple records."""
 
780
        sha1sum = osutils.sha_string(b'foo\nbar\n')
 
781
        total_txt = []
 
782
        gz_txt = self.create_gz_content(b'version rev-id-1 2 %s\n'
 
783
                                        b'foo\n'
 
784
                                        b'bar\n'
 
785
                                        b'end rev-id-1\n'
 
786
                                        % (sha1sum,))
 
787
        record_1 = (0, len(gz_txt), sha1sum)
 
788
        total_txt.append(gz_txt)
 
789
        sha1sum = osutils.sha_string(b'baz\n')
 
790
        gz_txt = self.create_gz_content(b'version rev-id-2 1 %s\n'
 
791
                                        b'baz\n'
 
792
                                        b'end rev-id-2\n'
 
793
                                        % (sha1sum,))
 
794
        record_2 = (record_1[1], len(gz_txt), sha1sum)
 
795
        total_txt.append(gz_txt)
 
796
        return total_txt, record_1, record_2
 
797
 
 
798
    def test_valid_knit_data(self):
 
799
        sha1sum = osutils.sha_string(b'foo\nbar\n')
 
800
        gz_txt = self.create_gz_content(b'version rev-id-1 2 %s\n'
 
801
                                        b'foo\n'
 
802
                                        b'bar\n'
 
803
                                        b'end rev-id-1\n'
 
804
                                        % (sha1sum,))
 
805
        transport = MockTransport([gz_txt])
 
806
        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
 
807
        knit = KnitVersionedFiles(None, access)
 
808
        records = [((b'rev-id-1',), ((b'rev-id-1',), 0, len(gz_txt)))]
 
809
 
 
810
        contents = list(knit._read_records_iter(records))
 
811
        self.assertEqual([((b'rev-id-1',), [b'foo\n', b'bar\n'],
 
812
                           b'4e48e2c9a3d2ca8a708cb0cc545700544efb5021')], contents)
 
813
 
 
814
        raw_contents = list(knit._read_records_iter_raw(records))
 
815
        self.assertEqual([((b'rev-id-1',), gz_txt, sha1sum)], raw_contents)
 
816
 
 
817
    def test_multiple_records_valid(self):
 
818
        total_txt, record_1, record_2 = self.make_multiple_records()
 
819
        transport = MockTransport([b''.join(total_txt)])
 
820
        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
 
821
        knit = KnitVersionedFiles(None, access)
 
822
        records = [((b'rev-id-1',), ((b'rev-id-1',), record_1[0], record_1[1])),
 
823
                   ((b'rev-id-2',), ((b'rev-id-2',), record_2[0], record_2[1]))]
 
824
 
 
825
        contents = list(knit._read_records_iter(records))
 
826
        self.assertEqual([((b'rev-id-1',), [b'foo\n', b'bar\n'], record_1[2]),
 
827
                          ((b'rev-id-2',), [b'baz\n'], record_2[2])],
 
828
                         contents)
 
829
 
 
830
        raw_contents = list(knit._read_records_iter_raw(records))
 
831
        self.assertEqual([((b'rev-id-1',), total_txt[0], record_1[2]),
 
832
                          ((b'rev-id-2',), total_txt[1], record_2[2])],
 
833
                         raw_contents)
 
834
 
 
835
    def test_not_enough_lines(self):
 
836
        sha1sum = osutils.sha_string(b'foo\n')
 
837
        # record says 2 lines data says 1
 
838
        gz_txt = self.create_gz_content(b'version rev-id-1 2 %s\n'
 
839
                                        b'foo\n'
 
840
                                        b'end rev-id-1\n'
 
841
                                        % (sha1sum,))
 
842
        transport = MockTransport([gz_txt])
 
843
        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
 
844
        knit = KnitVersionedFiles(None, access)
 
845
        records = [((b'rev-id-1',), ((b'rev-id-1',), 0, len(gz_txt)))]
 
846
        self.assertRaises(KnitCorrupt, list,
 
847
                          knit._read_records_iter(records))
 
848
 
 
849
        # read_records_iter_raw won't detect that sort of mismatch/corruption
 
850
        raw_contents = list(knit._read_records_iter_raw(records))
 
851
        self.assertEqual([((b'rev-id-1',), gz_txt, sha1sum)], raw_contents)
 
852
 
 
853
    def test_too_many_lines(self):
 
854
        sha1sum = osutils.sha_string(b'foo\nbar\n')
 
855
        # record says 1 lines data says 2
 
856
        gz_txt = self.create_gz_content(b'version rev-id-1 1 %s\n'
 
857
                                        b'foo\n'
 
858
                                        b'bar\n'
 
859
                                        b'end rev-id-1\n'
 
860
                                        % (sha1sum,))
 
861
        transport = MockTransport([gz_txt])
 
862
        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
 
863
        knit = KnitVersionedFiles(None, access)
 
864
        records = [((b'rev-id-1',), ((b'rev-id-1',), 0, len(gz_txt)))]
 
865
        self.assertRaises(KnitCorrupt, list,
 
866
                          knit._read_records_iter(records))
 
867
 
 
868
        # read_records_iter_raw won't detect that sort of mismatch/corruption
 
869
        raw_contents = list(knit._read_records_iter_raw(records))
 
870
        self.assertEqual([((b'rev-id-1',), gz_txt, sha1sum)], raw_contents)
 
871
 
 
872
    def test_mismatched_version_id(self):
 
873
        sha1sum = osutils.sha_string(b'foo\nbar\n')
 
874
        gz_txt = self.create_gz_content(b'version rev-id-1 2 %s\n'
 
875
                                        b'foo\n'
 
876
                                        b'bar\n'
 
877
                                        b'end rev-id-1\n'
 
878
                                        % (sha1sum,))
 
879
        transport = MockTransport([gz_txt])
 
880
        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
 
881
        knit = KnitVersionedFiles(None, access)
 
882
        # We are asking for rev-id-2, but the data is rev-id-1
 
883
        records = [((b'rev-id-2',), ((b'rev-id-2',), 0, len(gz_txt)))]
 
884
        self.assertRaises(KnitCorrupt, list,
 
885
                          knit._read_records_iter(records))
 
886
 
 
887
        # read_records_iter_raw detects mismatches in the header
 
888
        self.assertRaises(KnitCorrupt, list,
 
889
                          knit._read_records_iter_raw(records))
 
890
 
 
891
    def test_uncompressed_data(self):
 
892
        sha1sum = osutils.sha_string(b'foo\nbar\n')
 
893
        txt = (b'version rev-id-1 2 %s\n'
 
894
               b'foo\n'
 
895
               b'bar\n'
 
896
               b'end rev-id-1\n'
 
897
               % (sha1sum,))
 
898
        transport = MockTransport([txt])
 
899
        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
 
900
        knit = KnitVersionedFiles(None, access)
 
901
        records = [((b'rev-id-1',), ((b'rev-id-1',), 0, len(txt)))]
 
902
 
 
903
        # We don't have valid gzip data ==> corrupt
 
904
        self.assertRaises(KnitCorrupt, list,
 
905
                          knit._read_records_iter(records))
 
906
 
 
907
        # read_records_iter_raw will notice the bad data
 
908
        self.assertRaises(KnitCorrupt, list,
 
909
                          knit._read_records_iter_raw(records))
 
910
 
 
911
    def test_corrupted_data(self):
 
912
        sha1sum = osutils.sha_string(b'foo\nbar\n')
 
913
        gz_txt = self.create_gz_content(b'version rev-id-1 2 %s\n'
 
914
                                        b'foo\n'
 
915
                                        b'bar\n'
 
916
                                        b'end rev-id-1\n'
 
917
                                        % (sha1sum,))
 
918
        # Change 2 bytes in the middle to \xff
 
919
        gz_txt = gz_txt[:10] + b'\xff\xff' + gz_txt[12:]
 
920
        transport = MockTransport([gz_txt])
 
921
        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
 
922
        knit = KnitVersionedFiles(None, access)
 
923
        records = [((b'rev-id-1',), ((b'rev-id-1',), 0, len(gz_txt)))]
 
924
        self.assertRaises(KnitCorrupt, list,
 
925
                          knit._read_records_iter(records))
 
926
        # read_records_iter_raw will barf on bad gz data
 
927
        self.assertRaises(KnitCorrupt, list,
 
928
                          knit._read_records_iter_raw(records))
 
929
 
 
930
 
 
931
class LowLevelKnitIndexTests(TestCase):
 
932
 
 
933
    @property
 
934
    def _load_data(self):
 
935
        from ..bzr._knit_load_data_py import _load_data_py
 
936
        return _load_data_py
 
937
 
 
938
    def get_knit_index(self, transport, name, mode):
 
939
        mapper = ConstantMapper(name)
 
940
        self.overrideAttr(knit, '_load_data', self._load_data)
 
941
 
 
942
        def allow_writes():
 
943
            return 'w' in mode
 
944
        return _KndxIndex(transport, mapper, lambda: None, allow_writes, lambda: True)
 
945
 
 
946
    def test_create_file(self):
 
947
        transport = MockTransport()
 
948
        index = self.get_knit_index(transport, "filename", "w")
 
949
        index.keys()
 
950
        call = transport.calls.pop(0)
 
951
        # call[1][1] is a BytesIO - we can't test it by simple equality.
 
952
        self.assertEqual('put_file_non_atomic', call[0])
 
953
        self.assertEqual('filename.kndx', call[1][0])
 
954
        # With no history, _KndxIndex writes a new index:
 
955
        self.assertEqual(_KndxIndex.HEADER,
 
956
                         call[1][1].getvalue())
 
957
        self.assertEqual({'create_parent_dir': True}, call[2])
 
958
 
 
959
    def test_read_utf8_version_id(self):
 
960
        unicode_revision_id = u"version-\N{CYRILLIC CAPITAL LETTER A}"
 
961
        utf8_revision_id = unicode_revision_id.encode('utf-8')
 
962
        transport = MockTransport([
 
963
            _KndxIndex.HEADER,
 
964
            b'%s option 0 1 :' % (utf8_revision_id,)
 
965
            ])
 
966
        index = self.get_knit_index(transport, "filename", "r")
 
967
        # _KndxIndex is a private class, and deals in utf8 revision_ids, not
 
968
        # Unicode revision_ids.
 
969
        self.assertEqual({(utf8_revision_id,): ()},
 
970
                         index.get_parent_map(index.keys()))
 
971
        self.assertFalse((unicode_revision_id,) in index.keys())
 
972
 
 
973
    def test_read_utf8_parents(self):
 
974
        unicode_revision_id = u"version-\N{CYRILLIC CAPITAL LETTER A}"
 
975
        utf8_revision_id = unicode_revision_id.encode('utf-8')
 
976
        transport = MockTransport([
 
977
            _KndxIndex.HEADER,
 
978
            b"version option 0 1 .%s :" % (utf8_revision_id,)
 
979
            ])
 
980
        index = self.get_knit_index(transport, "filename", "r")
 
981
        self.assertEqual({(b"version",): ((utf8_revision_id,),)},
 
982
                         index.get_parent_map(index.keys()))
 
983
 
 
984
    def test_read_ignore_corrupted_lines(self):
 
985
        transport = MockTransport([
 
986
            _KndxIndex.HEADER,
 
987
            b"corrupted",
 
988
            b"corrupted options 0 1 .b .c ",
 
989
            b"version options 0 1 :"
 
990
            ])
 
991
        index = self.get_knit_index(transport, "filename", "r")
 
992
        self.assertEqual(1, len(index.keys()))
 
993
        self.assertEqual({(b"version",)}, index.keys())
 
994
 
 
995
    def test_read_corrupted_header(self):
 
996
        transport = MockTransport([b'not a bzr knit index header\n'])
 
997
        index = self.get_knit_index(transport, "filename", "r")
 
998
        self.assertRaises(KnitHeaderError, index.keys)
 
999
 
 
1000
    def test_read_duplicate_entries(self):
 
1001
        transport = MockTransport([
 
1002
            _KndxIndex.HEADER,
 
1003
            b"parent options 0 1 :",
 
1004
            b"version options1 0 1 0 :",
 
1005
            b"version options2 1 2 .other :",
 
1006
            b"version options3 3 4 0 .other :"
 
1007
            ])
 
1008
        index = self.get_knit_index(transport, "filename", "r")
 
1009
        self.assertEqual(2, len(index.keys()))
 
1010
        # check that the index used is the first one written. (Specific
 
1011
        # to KnitIndex style indices.
 
1012
        self.assertEqual(b"1", index._dictionary_compress([(b"version",)]))
 
1013
        self.assertEqual(((b"version",), 3, 4),
 
1014
                         index.get_position((b"version",)))
 
1015
        self.assertEqual([b"options3"], index.get_options((b"version",)))
 
1016
        self.assertEqual({(b"version",): ((b"parent",), (b"other",))},
 
1017
                         index.get_parent_map([(b"version",)]))
 
1018
 
 
1019
    def test_read_compressed_parents(self):
 
1020
        transport = MockTransport([
 
1021
            _KndxIndex.HEADER,
 
1022
            b"a option 0 1 :",
 
1023
            b"b option 0 1 0 :",
 
1024
            b"c option 0 1 1 0 :",
 
1025
            ])
 
1026
        index = self.get_knit_index(transport, "filename", "r")
 
1027
        self.assertEqual({(b"b",): ((b"a",),), (b"c",): ((b"b",), (b"a",))},
 
1028
                         index.get_parent_map([(b"b",), (b"c",)]))
 
1029
 
 
1030
    def test_write_utf8_version_id(self):
 
1031
        unicode_revision_id = u"version-\N{CYRILLIC CAPITAL LETTER A}"
 
1032
        utf8_revision_id = unicode_revision_id.encode('utf-8')
 
1033
        transport = MockTransport([
 
1034
            _KndxIndex.HEADER
 
1035
            ])
 
1036
        index = self.get_knit_index(transport, "filename", "r")
 
1037
        index.add_records([
 
1038
            ((utf8_revision_id,), [b"option"], ((utf8_revision_id,), 0, 1), [])])
 
1039
        call = transport.calls.pop(0)
 
1040
        # call[1][1] is a BytesIO - we can't test it by simple equality.
 
1041
        self.assertEqual('put_file_non_atomic', call[0])
 
1042
        self.assertEqual('filename.kndx', call[1][0])
 
1043
        # With no history, _KndxIndex writes a new index:
 
1044
        self.assertEqual(_KndxIndex.HEADER +
 
1045
                         b"\n%s option 0 1  :" % (utf8_revision_id,),
 
1046
                         call[1][1].getvalue())
 
1047
        self.assertEqual({'create_parent_dir': True}, call[2])
 
1048
 
 
1049
    def test_write_utf8_parents(self):
 
1050
        unicode_revision_id = u"version-\N{CYRILLIC CAPITAL LETTER A}"
 
1051
        utf8_revision_id = unicode_revision_id.encode('utf-8')
 
1052
        transport = MockTransport([
 
1053
            _KndxIndex.HEADER
 
1054
            ])
 
1055
        index = self.get_knit_index(transport, "filename", "r")
 
1056
        index.add_records([
 
1057
            ((b"version",), [b"option"], ((b"version",), 0, 1), [(utf8_revision_id,)])])
 
1058
        call = transport.calls.pop(0)
 
1059
        # call[1][1] is a BytesIO - we can't test it by simple equality.
 
1060
        self.assertEqual('put_file_non_atomic', call[0])
 
1061
        self.assertEqual('filename.kndx', call[1][0])
 
1062
        # With no history, _KndxIndex writes a new index:
 
1063
        self.assertEqual(_KndxIndex.HEADER +
 
1064
                         b"\nversion option 0 1 .%s :" % (utf8_revision_id,),
 
1065
                         call[1][1].getvalue())
 
1066
        self.assertEqual({'create_parent_dir': True}, call[2])
 
1067
 
 
1068
    def test_keys(self):
 
1069
        transport = MockTransport([
 
1070
            _KndxIndex.HEADER
 
1071
            ])
 
1072
        index = self.get_knit_index(transport, "filename", "r")
 
1073
 
 
1074
        self.assertEqual(set(), index.keys())
 
1075
 
 
1076
        index.add_records([((b"a",), [b"option"], ((b"a",), 0, 1), [])])
 
1077
        self.assertEqual({(b"a",)}, index.keys())
 
1078
 
 
1079
        index.add_records([((b"a",), [b"option"], ((b"a",), 0, 1), [])])
 
1080
        self.assertEqual({(b"a",)}, index.keys())
 
1081
 
 
1082
        index.add_records([((b"b",), [b"option"], ((b"b",), 0, 1), [])])
 
1083
        self.assertEqual({(b"a",), (b"b",)}, index.keys())
 
1084
 
 
1085
    def add_a_b(self, index, random_id=None):
 
1086
        kwargs = {}
 
1087
        if random_id is not None:
 
1088
            kwargs["random_id"] = random_id
 
1089
        index.add_records([
 
1090
            ((b"a",), [b"option"], ((b"a",), 0, 1), [(b"b",)]),
 
1091
            ((b"a",), [b"opt"], ((b"a",), 1, 2), [(b"c",)]),
 
1092
            ((b"b",), [b"option"], ((b"b",), 2, 3), [(b"a",)])
 
1093
            ], **kwargs)
 
1094
 
 
1095
    def assertIndexIsAB(self, index):
 
1096
        self.assertEqual({
 
1097
            (b'a',): ((b'c',),),
 
1098
            (b'b',): ((b'a',),),
 
1099
            },
 
1100
            index.get_parent_map(index.keys()))
 
1101
        self.assertEqual(((b"a",), 1, 2), index.get_position((b"a",)))
 
1102
        self.assertEqual(((b"b",), 2, 3), index.get_position((b"b",)))
 
1103
        self.assertEqual([b"opt"], index.get_options((b"a",)))
 
1104
 
 
1105
    def test_add_versions(self):
 
1106
        transport = MockTransport([
 
1107
            _KndxIndex.HEADER
 
1108
            ])
 
1109
        index = self.get_knit_index(transport, "filename", "r")
 
1110
 
 
1111
        self.add_a_b(index)
 
1112
        call = transport.calls.pop(0)
 
1113
        # call[1][1] is a BytesIO - we can't test it by simple equality.
 
1114
        self.assertEqual('put_file_non_atomic', call[0])
 
1115
        self.assertEqual('filename.kndx', call[1][0])
 
1116
        # With no history, _KndxIndex writes a new index:
 
1117
        self.assertEqual(
 
1118
            _KndxIndex.HEADER +
 
1119
            b"\na option 0 1 .b :"
 
1120
            b"\na opt 1 2 .c :"
 
1121
            b"\nb option 2 3 0 :",
 
1122
            call[1][1].getvalue())
 
1123
        self.assertEqual({'create_parent_dir': True}, call[2])
 
1124
        self.assertIndexIsAB(index)
 
1125
 
 
1126
    def test_add_versions_random_id_is_accepted(self):
 
1127
        transport = MockTransport([
 
1128
            _KndxIndex.HEADER
 
1129
            ])
 
1130
        index = self.get_knit_index(transport, "filename", "r")
 
1131
        self.add_a_b(index, random_id=True)
 
1132
 
 
1133
    def test_delay_create_and_add_versions(self):
 
1134
        transport = MockTransport()
 
1135
 
 
1136
        index = self.get_knit_index(transport, "filename", "w")
 
1137
        # dir_mode=0777)
 
1138
        self.assertEqual([], transport.calls)
 
1139
        self.add_a_b(index)
 
1140
        # self.assertEqual(
 
1141
        # [    {"dir_mode": 0777, "create_parent_dir": True, "mode": "wb"},
 
1142
        #    kwargs)
 
1143
        # Two calls: one during which we load the existing index (and when its
 
1144
        # missing create it), then a second where we write the contents out.
 
1145
        self.assertEqual(2, len(transport.calls))
 
1146
        call = transport.calls.pop(0)
 
1147
        self.assertEqual('put_file_non_atomic', call[0])
 
1148
        self.assertEqual('filename.kndx', call[1][0])
 
1149
        # With no history, _KndxIndex writes a new index:
 
1150
        self.assertEqual(_KndxIndex.HEADER, call[1][1].getvalue())
 
1151
        self.assertEqual({'create_parent_dir': True}, call[2])
 
1152
        call = transport.calls.pop(0)
 
1153
        # call[1][1] is a BytesIO - we can't test it by simple equality.
 
1154
        self.assertEqual('put_file_non_atomic', call[0])
 
1155
        self.assertEqual('filename.kndx', call[1][0])
 
1156
        # With no history, _KndxIndex writes a new index:
 
1157
        self.assertEqual(
 
1158
            _KndxIndex.HEADER +
 
1159
            b"\na option 0 1 .b :"
 
1160
            b"\na opt 1 2 .c :"
 
1161
            b"\nb option 2 3 0 :",
 
1162
            call[1][1].getvalue())
 
1163
        self.assertEqual({'create_parent_dir': True}, call[2])
 
1164
 
 
1165
    def assertTotalBuildSize(self, size, keys, positions):
 
1166
        self.assertEqual(size,
 
1167
                         knit._get_total_build_size(None, keys, positions))
 
1168
 
 
1169
    def test__get_total_build_size(self):
 
1170
        positions = {
 
1171
            (b'a',): (('fulltext', False), ((b'a',), 0, 100), None),
 
1172
            (b'b',): (('line-delta', False), ((b'b',), 100, 21), (b'a',)),
 
1173
            (b'c',): (('line-delta', False), ((b'c',), 121, 35), (b'b',)),
 
1174
            (b'd',): (('line-delta', False), ((b'd',), 156, 12), (b'b',)),
 
1175
            }
 
1176
        self.assertTotalBuildSize(100, [(b'a',)], positions)
 
1177
        self.assertTotalBuildSize(121, [(b'b',)], positions)
 
1178
        # c needs both a & b
 
1179
        self.assertTotalBuildSize(156, [(b'c',)], positions)
 
1180
        # we shouldn't count 'b' twice
 
1181
        self.assertTotalBuildSize(156, [(b'b',), (b'c',)], positions)
 
1182
        self.assertTotalBuildSize(133, [(b'd',)], positions)
 
1183
        self.assertTotalBuildSize(168, [(b'c',), (b'd',)], positions)
 
1184
 
 
1185
    def test_get_position(self):
 
1186
        transport = MockTransport([
 
1187
            _KndxIndex.HEADER,
 
1188
            b"a option 0 1 :",
 
1189
            b"b option 1 2 :"
 
1190
            ])
 
1191
        index = self.get_knit_index(transport, "filename", "r")
 
1192
 
 
1193
        self.assertEqual(((b"a",), 0, 1), index.get_position((b"a",)))
 
1194
        self.assertEqual(((b"b",), 1, 2), index.get_position((b"b",)))
 
1195
 
 
1196
    def test_get_method(self):
 
1197
        transport = MockTransport([
 
1198
            _KndxIndex.HEADER,
 
1199
            b"a fulltext,unknown 0 1 :",
 
1200
            b"b unknown,line-delta 1 2 :",
 
1201
            b"c bad 3 4 :"
 
1202
            ])
 
1203
        index = self.get_knit_index(transport, "filename", "r")
 
1204
 
 
1205
        self.assertEqual("fulltext", index.get_method(b"a"))
 
1206
        self.assertEqual("line-delta", index.get_method(b"b"))
 
1207
        self.assertRaises(knit.KnitIndexUnknownMethod, index.get_method, b"c")
 
1208
 
 
1209
    def test_get_options(self):
 
1210
        transport = MockTransport([
 
1211
            _KndxIndex.HEADER,
 
1212
            b"a opt1 0 1 :",
 
1213
            b"b opt2,opt3 1 2 :"
 
1214
            ])
 
1215
        index = self.get_knit_index(transport, "filename", "r")
 
1216
 
 
1217
        self.assertEqual([b"opt1"], index.get_options(b"a"))
 
1218
        self.assertEqual([b"opt2", b"opt3"], index.get_options(b"b"))
 
1219
 
 
1220
    def test_get_parent_map(self):
 
1221
        transport = MockTransport([
 
1222
            _KndxIndex.HEADER,
 
1223
            b"a option 0 1 :",
 
1224
            b"b option 1 2 0 .c :",
 
1225
            b"c option 1 2 1 0 .e :"
 
1226
            ])
 
1227
        index = self.get_knit_index(transport, "filename", "r")
 
1228
 
 
1229
        self.assertEqual({
 
1230
            (b"a",): (),
 
1231
            (b"b",): ((b"a",), (b"c",)),
 
1232
            (b"c",): ((b"b",), (b"a",), (b"e",)),
 
1233
            }, index.get_parent_map(index.keys()))
 
1234
 
 
1235
    def test_impossible_parent(self):
 
1236
        """Test we get KnitCorrupt if the parent couldn't possibly exist."""
 
1237
        transport = MockTransport([
 
1238
            _KndxIndex.HEADER,
 
1239
            b"a option 0 1 :",
 
1240
            b"b option 0 1 4 :"  # We don't have a 4th record
 
1241
            ])
 
1242
        index = self.get_knit_index(transport, 'filename', 'r')
 
1243
        self.assertRaises(KnitCorrupt, index.keys)
 
1244
 
 
1245
    def test_corrupted_parent(self):
 
1246
        transport = MockTransport([
 
1247
            _KndxIndex.HEADER,
 
1248
            b"a option 0 1 :",
 
1249
            b"b option 0 1 :",
 
1250
            b"c option 0 1 1v :",  # Can't have a parent of '1v'
 
1251
            ])
 
1252
        index = self.get_knit_index(transport, 'filename', 'r')
 
1253
        self.assertRaises(KnitCorrupt, index.keys)
 
1254
 
 
1255
    def test_corrupted_parent_in_list(self):
 
1256
        transport = MockTransport([
 
1257
            _KndxIndex.HEADER,
 
1258
            b"a option 0 1 :",
 
1259
            b"b option 0 1 :",
 
1260
            b"c option 0 1 1 v :",  # Can't have a parent of 'v'
 
1261
            ])
 
1262
        index = self.get_knit_index(transport, 'filename', 'r')
 
1263
        self.assertRaises(KnitCorrupt, index.keys)
 
1264
 
 
1265
    def test_invalid_position(self):
 
1266
        transport = MockTransport([
 
1267
            _KndxIndex.HEADER,
 
1268
            b"a option 1v 1 :",
 
1269
            ])
 
1270
        index = self.get_knit_index(transport, 'filename', 'r')
 
1271
        self.assertRaises(KnitCorrupt, index.keys)
 
1272
 
 
1273
    def test_invalid_size(self):
 
1274
        transport = MockTransport([
 
1275
            _KndxIndex.HEADER,
 
1276
            b"a option 1 1v :",
 
1277
            ])
 
1278
        index = self.get_knit_index(transport, 'filename', 'r')
 
1279
        self.assertRaises(KnitCorrupt, index.keys)
 
1280
 
 
1281
    def test_scan_unvalidated_index_not_implemented(self):
 
1282
        transport = MockTransport()
 
1283
        index = self.get_knit_index(transport, 'filename', 'r')
 
1284
        self.assertRaises(
 
1285
            NotImplementedError, index.scan_unvalidated_index,
 
1286
            'dummy graph_index')
 
1287
        self.assertRaises(
 
1288
            NotImplementedError, index.get_missing_compression_parents)
 
1289
 
 
1290
    def test_short_line(self):
 
1291
        transport = MockTransport([
 
1292
            _KndxIndex.HEADER,
 
1293
            b"a option 0 10  :",
 
1294
            b"b option 10 10 0",  # This line isn't terminated, ignored
 
1295
            ])
 
1296
        index = self.get_knit_index(transport, "filename", "r")
 
1297
        self.assertEqual({(b'a',)}, index.keys())
 
1298
 
 
1299
    def test_skip_incomplete_record(self):
 
1300
        # A line with bogus data should just be skipped
 
1301
        transport = MockTransport([
 
1302
            _KndxIndex.HEADER,
 
1303
            b"a option 0 10  :",
 
1304
            b"b option 10 10 0",  # This line isn't terminated, ignored
 
1305
            b"c option 20 10 0 :",  # Properly terminated, and starts with '\n'
 
1306
            ])
 
1307
        index = self.get_knit_index(transport, "filename", "r")
 
1308
        self.assertEqual({(b'a',), (b'c',)}, index.keys())
 
1309
 
 
1310
    def test_trailing_characters(self):
 
1311
        # A line with bogus data should just be skipped
 
1312
        transport = MockTransport([
 
1313
            _KndxIndex.HEADER,
 
1314
            b"a option 0 10  :",
 
1315
            b"b option 10 10 0 :a",  # This line has extra trailing characters
 
1316
            b"c option 20 10 0 :",  # Properly terminated, and starts with '\n'
 
1317
            ])
 
1318
        index = self.get_knit_index(transport, "filename", "r")
 
1319
        self.assertEqual({(b'a',), (b'c',)}, index.keys())
 
1320
 
 
1321
 
 
1322
class LowLevelKnitIndexTests_c(LowLevelKnitIndexTests):
 
1323
 
 
1324
    _test_needs_features = [compiled_knit_feature]
 
1325
 
 
1326
    @property
 
1327
    def _load_data(self):
 
1328
        from ..bzr._knit_load_data_pyx import _load_data_c
 
1329
        return _load_data_c
 
1330
 
 
1331
 
 
1332
class Test_KnitAnnotator(TestCaseWithMemoryTransport):
 
1333
 
 
1334
    def make_annotator(self):
 
1335
        factory = knit.make_pack_factory(True, True, 1)
 
1336
        vf = factory(self.get_transport())
 
1337
        return knit._KnitAnnotator(vf)
 
1338
 
 
1339
    def test__expand_fulltext(self):
 
1340
        ann = self.make_annotator()
 
1341
        rev_key = (b'rev-id',)
 
1342
        ann._num_compression_children[rev_key] = 1
 
1343
        res = ann._expand_record(rev_key, ((b'parent-id',),), None,
 
1344
                                 [b'line1\n', b'line2\n'], ('fulltext', True))
 
1345
        # The content object and text lines should be cached appropriately
 
1346
        self.assertEqual([b'line1\n', b'line2'], res)
 
1347
        content_obj = ann._content_objects[rev_key]
 
1348
        self.assertEqual([b'line1\n', b'line2\n'], content_obj._lines)
 
1349
        self.assertEqual(res, content_obj.text())
 
1350
        self.assertEqual(res, ann._text_cache[rev_key])
 
1351
 
 
1352
    def test__expand_delta_comp_parent_not_available(self):
 
1353
        # Parent isn't available yet, so we return nothing, but queue up this
 
1354
        # node for later processing
 
1355
        ann = self.make_annotator()
 
1356
        rev_key = (b'rev-id',)
 
1357
        parent_key = (b'parent-id',)
 
1358
        record = [b'0,1,1\n', b'new-line\n']
 
1359
        details = ('line-delta', False)
 
1360
        res = ann._expand_record(rev_key, (parent_key,), parent_key,
 
1361
                                 record, details)
 
1362
        self.assertEqual(None, res)
 
1363
        self.assertTrue(parent_key in ann._pending_deltas)
 
1364
        pending = ann._pending_deltas[parent_key]
 
1365
        self.assertEqual(1, len(pending))
 
1366
        self.assertEqual((rev_key, (parent_key,), record, details), pending[0])
 
1367
 
 
1368
    def test__expand_record_tracks_num_children(self):
 
1369
        ann = self.make_annotator()
 
1370
        rev_key = (b'rev-id',)
 
1371
        rev2_key = (b'rev2-id',)
 
1372
        parent_key = (b'parent-id',)
 
1373
        record = [b'0,1,1\n', b'new-line\n']
 
1374
        details = ('line-delta', False)
 
1375
        ann._num_compression_children[parent_key] = 2
 
1376
        ann._expand_record(parent_key, (), None, [b'line1\n', b'line2\n'],
 
1377
                           ('fulltext', False))
 
1378
        res = ann._expand_record(rev_key, (parent_key,), parent_key,
 
1379
                                 record, details)
 
1380
        self.assertEqual({parent_key: 1}, ann._num_compression_children)
 
1381
        # Expanding the second child should remove the content object, and the
 
1382
        # num_compression_children entry
 
1383
        res = ann._expand_record(rev2_key, (parent_key,), parent_key,
 
1384
                                 record, details)
 
1385
        self.assertFalse(parent_key in ann._content_objects)
 
1386
        self.assertEqual({}, ann._num_compression_children)
 
1387
        # We should not cache the content_objects for rev2 and rev, because
 
1388
        # they do not have compression children of their own.
 
1389
        self.assertEqual({}, ann._content_objects)
 
1390
 
 
1391
    def test__expand_delta_records_blocks(self):
 
1392
        ann = self.make_annotator()
 
1393
        rev_key = (b'rev-id',)
 
1394
        parent_key = (b'parent-id',)
 
1395
        record = [b'0,1,1\n', b'new-line\n']
 
1396
        details = ('line-delta', True)
 
1397
        ann._num_compression_children[parent_key] = 2
 
1398
        ann._expand_record(parent_key, (), None,
 
1399
                           [b'line1\n', b'line2\n', b'line3\n'],
 
1400
                           ('fulltext', False))
 
1401
        ann._expand_record(rev_key, (parent_key,), parent_key, record, details)
 
1402
        self.assertEqual({(rev_key, parent_key): [(1, 1, 1), (3, 3, 0)]},
 
1403
                         ann._matching_blocks)
 
1404
        rev2_key = (b'rev2-id',)
 
1405
        record = [b'0,1,1\n', b'new-line\n']
 
1406
        details = ('line-delta', False)
 
1407
        ann._expand_record(rev2_key, (parent_key,),
 
1408
                           parent_key, record, details)
 
1409
        self.assertEqual([(1, 1, 2), (3, 3, 0)],
 
1410
                         ann._matching_blocks[(rev2_key, parent_key)])
 
1411
 
 
1412
    def test__get_parent_ann_uses_matching_blocks(self):
 
1413
        ann = self.make_annotator()
 
1414
        rev_key = (b'rev-id',)
 
1415
        parent_key = (b'parent-id',)
 
1416
        parent_ann = [(parent_key,)] * 3
 
1417
        block_key = (rev_key, parent_key)
 
1418
        ann._annotations_cache[parent_key] = parent_ann
 
1419
        ann._matching_blocks[block_key] = [(0, 1, 1), (3, 3, 0)]
 
1420
        # We should not try to access any parent_lines content, because we know
 
1421
        # we already have the matching blocks
 
1422
        par_ann, blocks = ann._get_parent_annotations_and_matches(rev_key,
 
1423
                                                                  [b'1\n', b'2\n', b'3\n'], parent_key)
 
1424
        self.assertEqual(parent_ann, par_ann)
 
1425
        self.assertEqual([(0, 1, 1), (3, 3, 0)], blocks)
 
1426
        self.assertEqual({}, ann._matching_blocks)
 
1427
 
 
1428
    def test__process_pending(self):
 
1429
        ann = self.make_annotator()
 
1430
        rev_key = (b'rev-id',)
 
1431
        p1_key = (b'p1-id',)
 
1432
        p2_key = (b'p2-id',)
 
1433
        record = [b'0,1,1\n', b'new-line\n']
 
1434
        details = ('line-delta', False)
 
1435
        p1_record = [b'line1\n', b'line2\n']
 
1436
        ann._num_compression_children[p1_key] = 1
 
1437
        res = ann._expand_record(rev_key, (p1_key, p2_key), p1_key,
 
1438
                                 record, details)
 
1439
        self.assertEqual(None, res)
 
1440
        # self.assertTrue(p1_key in ann._pending_deltas)
 
1441
        self.assertEqual({}, ann._pending_annotation)
 
1442
        # Now insert p1, and we should be able to expand the delta
 
1443
        res = ann._expand_record(p1_key, (), None, p1_record,
 
1444
                                 ('fulltext', False))
 
1445
        self.assertEqual(p1_record, res)
 
1446
        ann._annotations_cache[p1_key] = [(p1_key,)] * 2
 
1447
        res = ann._process_pending(p1_key)
 
1448
        self.assertEqual([], res)
 
1449
        self.assertFalse(p1_key in ann._pending_deltas)
 
1450
        self.assertTrue(p2_key in ann._pending_annotation)
 
1451
        self.assertEqual({p2_key: [(rev_key, (p1_key, p2_key))]},
 
1452
                         ann._pending_annotation)
 
1453
        # Now fill in parent 2, and pending annotation should be satisfied
 
1454
        res = ann._expand_record(p2_key, (), None, [], ('fulltext', False))
 
1455
        ann._annotations_cache[p2_key] = []
 
1456
        res = ann._process_pending(p2_key)
 
1457
        self.assertEqual([rev_key], res)
 
1458
        self.assertEqual({}, ann._pending_annotation)
 
1459
        self.assertEqual({}, ann._pending_deltas)
 
1460
 
 
1461
    def test_record_delta_removes_basis(self):
 
1462
        ann = self.make_annotator()
 
1463
        ann._expand_record((b'parent-id',), (), None,
 
1464
                           [b'line1\n', b'line2\n'], ('fulltext', False))
 
1465
        ann._num_compression_children[b'parent-id'] = 2
 
1466
 
 
1467
    def test_annotate_special_text(self):
 
1468
        ann = self.make_annotator()
 
1469
        vf = ann._vf
 
1470
        rev1_key = (b'rev-1',)
 
1471
        rev2_key = (b'rev-2',)
 
1472
        rev3_key = (b'rev-3',)
 
1473
        spec_key = (b'special:',)
 
1474
        vf.add_lines(rev1_key, [], [b'initial content\n'])
 
1475
        vf.add_lines(rev2_key, [rev1_key], [b'initial content\n',
 
1476
                                            b'common content\n',
 
1477
                                            b'content in 2\n'])
 
1478
        vf.add_lines(rev3_key, [rev1_key], [b'initial content\n',
 
1479
                                            b'common content\n',
 
1480
                                            b'content in 3\n'])
 
1481
        spec_text = (b'initial content\n'
 
1482
                     b'common content\n'
 
1483
                     b'content in 2\n'
 
1484
                     b'content in 3\n')
 
1485
        ann.add_special_text(spec_key, [rev2_key, rev3_key], spec_text)
 
1486
        anns, lines = ann.annotate(spec_key)
 
1487
        self.assertEqual([(rev1_key,),
 
1488
                          (rev2_key, rev3_key),
 
1489
                          (rev2_key,),
 
1490
                          (rev3_key,),
 
1491
                          ], anns)
 
1492
        self.assertEqualDiff(spec_text, b''.join(lines))
 
1493
 
 
1494
 
 
1495
class KnitTests(TestCaseWithTransport):
 
1496
    """Class containing knit test helper routines."""
 
1497
 
 
1498
    def make_test_knit(self, annotate=False, name='test'):
 
1499
        mapper = ConstantMapper(name)
 
1500
        return make_file_factory(annotate, mapper)(self.get_transport())
 
1501
 
 
1502
 
 
1503
class TestBadShaError(KnitTests):
 
1504
    """Tests for handling of sha errors."""
 
1505
 
 
1506
    def test_sha_exception_has_text(self):
 
1507
        # having the failed text included in the error allows for recovery.
 
1508
        source = self.make_test_knit()
 
1509
        target = self.make_test_knit(name="target")
 
1510
        if not source._max_delta_chain:
 
1511
            raise TestNotApplicable(
 
1512
                "cannot get delta-caused sha failures without deltas.")
 
1513
        # create a basis
 
1514
        basis = (b'basis',)
 
1515
        broken = (b'broken',)
 
1516
        source.add_lines(basis, (), [b'foo\n'])
 
1517
        source.add_lines(broken, (basis,), [b'foo\n', b'bar\n'])
 
1518
        # Seed target with a bad basis text
 
1519
        target.add_lines(basis, (), [b'gam\n'])
 
1520
        target.insert_record_stream(
 
1521
            source.get_record_stream([broken], 'unordered', False))
 
1522
        err = self.assertRaises(KnitCorrupt,
 
1523
                                next(target.get_record_stream([broken], 'unordered', True
 
1524
                                                              )).get_bytes_as, 'chunked')
 
1525
        self.assertEqual([b'gam\n', b'bar\n'], err.content)
 
1526
        # Test for formatting with live data
 
1527
        self.assertStartsWith(str(err), "Knit ")
 
1528
 
 
1529
 
 
1530
class TestKnitIndex(KnitTests):
 
1531
 
 
1532
    def test_add_versions_dictionary_compresses(self):
 
1533
        """Adding versions to the index should update the lookup dict"""
 
1534
        knit = self.make_test_knit()
 
1535
        idx = knit._index
 
1536
        idx.add_records([((b'a-1',), [b'fulltext'], ((b'a-1',), 0, 0), [])])
 
1537
        self.check_file_contents('test.kndx',
 
1538
                                 b'# bzr knit index 8\n'
 
1539
                                 b'\n'
 
1540
                                 b'a-1 fulltext 0 0  :'
 
1541
                                 )
 
1542
        idx.add_records([
 
1543
            ((b'a-2',), [b'fulltext'], ((b'a-2',), 0, 0), [(b'a-1',)]),
 
1544
            ((b'a-3',), [b'fulltext'], ((b'a-3',), 0, 0), [(b'a-2',)]),
 
1545
            ])
 
1546
        self.check_file_contents('test.kndx',
 
1547
                                 b'# bzr knit index 8\n'
 
1548
                                 b'\n'
 
1549
                                 b'a-1 fulltext 0 0  :\n'
 
1550
                                 b'a-2 fulltext 0 0 0 :\n'
 
1551
                                 b'a-3 fulltext 0 0 1 :'
 
1552
                                 )
 
1553
        self.assertEqual({(b'a-3',), (b'a-1',), (b'a-2',)}, idx.keys())
 
1554
        self.assertEqual({
 
1555
            (b'a-1',): (((b'a-1',), 0, 0), None, (), ('fulltext', False)),
 
1556
            (b'a-2',): (((b'a-2',), 0, 0), None, ((b'a-1',),), ('fulltext', False)),
 
1557
            (b'a-3',): (((b'a-3',), 0, 0), None, ((b'a-2',),), ('fulltext', False)),
 
1558
            }, idx.get_build_details(idx.keys()))
 
1559
        self.assertEqual({(b'a-1',): (),
 
1560
                          (b'a-2',): ((b'a-1',),),
 
1561
                          (b'a-3',): ((b'a-2',),), },
 
1562
                         idx.get_parent_map(idx.keys()))
 
1563
 
 
1564
    def test_add_versions_fails_clean(self):
 
1565
        """If add_versions fails in the middle, it restores a pristine state.
 
1566
 
 
1567
        Any modifications that are made to the index are reset if all versions
 
1568
        cannot be added.
 
1569
        """
 
1570
        # This cheats a little bit by passing in a generator which will
 
1571
        # raise an exception before the processing finishes
 
1572
        # Other possibilities would be to have an version with the wrong number
 
1573
        # of entries, or to make the backing transport unable to write any
 
1574
        # files.
 
1575
 
 
1576
        knit = self.make_test_knit()
 
1577
        idx = knit._index
 
1578
        idx.add_records([((b'a-1',), [b'fulltext'], ((b'a-1',), 0, 0), [])])
 
1579
 
 
1580
        class StopEarly(Exception):
 
1581
            pass
 
1582
 
 
1583
        def generate_failure():
 
1584
            """Add some entries and then raise an exception"""
 
1585
            yield ((b'a-2',), [b'fulltext'], (None, 0, 0), (b'a-1',))
 
1586
            yield ((b'a-3',), [b'fulltext'], (None, 0, 0), (b'a-2',))
 
1587
            raise StopEarly()
 
1588
 
 
1589
        # Assert the pre-condition
 
1590
        def assertA1Only():
 
1591
            self.assertEqual({(b'a-1',)}, set(idx.keys()))
 
1592
            self.assertEqual(
 
1593
                {(b'a-1',): (((b'a-1',), 0, 0), None, (), ('fulltext', False))},
 
1594
                idx.get_build_details([(b'a-1',)]))
 
1595
            self.assertEqual({(b'a-1',): ()}, idx.get_parent_map(idx.keys()))
 
1596
 
 
1597
        assertA1Only()
 
1598
        self.assertRaises(StopEarly, idx.add_records, generate_failure())
 
1599
        # And it shouldn't be modified
 
1600
        assertA1Only()
 
1601
 
 
1602
    def test_knit_index_ignores_empty_files(self):
 
1603
        # There was a race condition in older bzr, where a ^C at the right time
 
1604
        # could leave an empty .kndx file, which bzr would later claim was a
 
1605
        # corrupted file since the header was not present. In reality, the file
 
1606
        # just wasn't created, so it should be ignored.
 
1607
        t = transport.get_transport_from_path('.')
 
1608
        t.put_bytes('test.kndx', b'')
 
1609
 
 
1610
        knit = self.make_test_knit()
 
1611
 
 
1612
    def test_knit_index_checks_header(self):
 
1613
        t = transport.get_transport_from_path('.')
 
1614
        t.put_bytes('test.kndx', b'# not really a knit header\n\n')
 
1615
        k = self.make_test_knit()
 
1616
        self.assertRaises(KnitHeaderError, k.keys)
 
1617
 
 
1618
 
 
1619
class TestGraphIndexKnit(KnitTests):
 
1620
    """Tests for knits using a GraphIndex rather than a KnitIndex."""
 
1621
 
 
1622
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
1623
        builder = GraphIndexBuilder(ref_lists)
 
1624
        for node, references, value in nodes:
 
1625
            builder.add_node(node, references, value)
 
1626
        stream = builder.finish()
 
1627
        trans = self.get_transport()
 
1628
        size = trans.put_file(name, stream)
 
1629
        return GraphIndex(trans, name, size)
 
1630
 
 
1631
    def two_graph_index(self, deltas=False, catch_adds=False):
 
1632
        """Build a two-graph index.
 
1633
 
 
1634
        :param deltas: If true, use underlying indices with two node-ref
 
1635
            lists and 'parent' set to a delta-compressed against tail.
 
1636
        """
 
1637
        # build a complex graph across several indices.
 
1638
        if deltas:
 
1639
            # delta compression inn the index
 
1640
            index1 = self.make_g_index('1', 2, [
 
1641
                ((b'tip', ), b'N0 100', ([(b'parent', )], [], )),
 
1642
                ((b'tail', ), b'', ([], []))])
 
1643
            index2 = self.make_g_index('2', 2, [
 
1644
                ((b'parent', ), b' 100 78',
 
1645
                 ([(b'tail', ), (b'ghost', )], [(b'tail', )])),
 
1646
                ((b'separate', ), b'', ([], []))])
 
1647
        else:
 
1648
            # just blob location and graph in the index.
 
1649
            index1 = self.make_g_index('1', 1, [
 
1650
                ((b'tip', ), b'N0 100', ([(b'parent', )], )),
 
1651
                ((b'tail', ), b'', ([], ))])
 
1652
            index2 = self.make_g_index('2', 1, [
 
1653
                ((b'parent', ), b' 100 78', ([(b'tail', ), (b'ghost', )], )),
 
1654
                ((b'separate', ), b'', ([], ))])
 
1655
        combined_index = CombinedGraphIndex([index1, index2])
 
1656
        if catch_adds:
 
1657
            self.combined_index = combined_index
 
1658
            self.caught_entries = []
 
1659
            add_callback = self.catch_add
 
1660
        else:
 
1661
            add_callback = None
 
1662
        return _KnitGraphIndex(combined_index, lambda: True, deltas=deltas,
 
1663
                               add_callback=add_callback)
 
1664
 
 
1665
    def test_keys(self):
 
1666
        index = self.two_graph_index()
 
1667
        self.assertEqual({(b'tail',), (b'tip',), (b'parent',), (b'separate',)},
 
1668
                         set(index.keys()))
 
1669
 
 
1670
    def test_get_position(self):
 
1671
        index = self.two_graph_index()
 
1672
        self.assertEqual(
 
1673
            (index._graph_index._indices[0], 0, 100), index.get_position((b'tip',)))
 
1674
        self.assertEqual(
 
1675
            (index._graph_index._indices[1], 100, 78), index.get_position((b'parent',)))
 
1676
 
 
1677
    def test_get_method_deltas(self):
 
1678
        index = self.two_graph_index(deltas=True)
 
1679
        self.assertEqual('fulltext', index.get_method((b'tip',)))
 
1680
        self.assertEqual('line-delta', index.get_method((b'parent',)))
 
1681
 
 
1682
    def test_get_method_no_deltas(self):
 
1683
        # check that the parent-history lookup is ignored with deltas=False.
 
1684
        index = self.two_graph_index(deltas=False)
 
1685
        self.assertEqual('fulltext', index.get_method((b'tip',)))
 
1686
        self.assertEqual('fulltext', index.get_method((b'parent',)))
 
1687
 
 
1688
    def test_get_options_deltas(self):
 
1689
        index = self.two_graph_index(deltas=True)
 
1690
        self.assertEqual([b'fulltext', b'no-eol'],
 
1691
                         index.get_options((b'tip',)))
 
1692
        self.assertEqual([b'line-delta'], index.get_options((b'parent',)))
 
1693
 
 
1694
    def test_get_options_no_deltas(self):
 
1695
        # check that the parent-history lookup is ignored with deltas=False.
 
1696
        index = self.two_graph_index(deltas=False)
 
1697
        self.assertEqual([b'fulltext', b'no-eol'],
 
1698
                         index.get_options((b'tip',)))
 
1699
        self.assertEqual([b'fulltext'], index.get_options((b'parent',)))
 
1700
 
 
1701
    def test_get_parent_map(self):
 
1702
        index = self.two_graph_index()
 
1703
        self.assertEqual({(b'parent',): ((b'tail',), (b'ghost',))},
 
1704
                         index.get_parent_map([(b'parent',), (b'ghost',)]))
 
1705
 
 
1706
    def catch_add(self, entries):
 
1707
        self.caught_entries.append(entries)
 
1708
 
 
1709
    def test_add_no_callback_errors(self):
 
1710
        index = self.two_graph_index()
 
1711
        self.assertRaises(errors.ReadOnlyError, index.add_records,
 
1712
                          [((b'new',), b'fulltext,no-eol', (None, 50, 60), [b'separate'])])
 
1713
 
 
1714
    def test_add_version_smoke(self):
 
1715
        index = self.two_graph_index(catch_adds=True)
 
1716
        index.add_records([((b'new',), b'fulltext,no-eol', (None, 50, 60),
 
1717
                            [(b'separate',)])])
 
1718
        self.assertEqual([[((b'new', ), b'N50 60', (((b'separate',),),))]],
 
1719
                         self.caught_entries)
 
1720
 
 
1721
    def test_add_version_delta_not_delta_index(self):
 
1722
        index = self.two_graph_index(catch_adds=True)
 
1723
        self.assertRaises(KnitCorrupt, index.add_records,
 
1724
                          [((b'new',), b'no-eol,line-delta', (None, 0, 100), [(b'parent',)])])
 
1725
        self.assertEqual([], self.caught_entries)
 
1726
 
 
1727
    def test_add_version_same_dup(self):
 
1728
        index = self.two_graph_index(catch_adds=True)
 
1729
        # options can be spelt two different ways
 
1730
        index.add_records(
 
1731
            [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [(b'parent',)])])
 
1732
        index.add_records(
 
1733
            [((b'tip',), b'no-eol,fulltext', (None, 0, 100), [(b'parent',)])])
 
1734
        # position/length are ignored (because each pack could have fulltext or
 
1735
        # delta, and be at a different position.
 
1736
        index.add_records([((b'tip',), b'fulltext,no-eol', (None, 50, 100),
 
1737
                            [(b'parent',)])])
 
1738
        index.add_records([((b'tip',), b'fulltext,no-eol', (None, 0, 1000),
 
1739
                            [(b'parent',)])])
 
1740
        # but neither should have added data:
 
1741
        self.assertEqual([[], [], [], []], self.caught_entries)
 
1742
 
 
1743
    def test_add_version_different_dup(self):
 
1744
        index = self.two_graph_index(deltas=True, catch_adds=True)
 
1745
        # change options
 
1746
        self.assertRaises(KnitCorrupt, index.add_records,
 
1747
                          [((b'tip',), b'line-delta', (None, 0, 100), [(b'parent',)])])
 
1748
        self.assertRaises(KnitCorrupt, index.add_records,
 
1749
                          [((b'tip',), b'fulltext', (None, 0, 100), [(b'parent',)])])
 
1750
        # parents
 
1751
        self.assertRaises(KnitCorrupt, index.add_records,
 
1752
                          [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [])])
 
1753
        self.assertEqual([], self.caught_entries)
 
1754
 
 
1755
    def test_add_versions_nodeltas(self):
 
1756
        index = self.two_graph_index(catch_adds=True)
 
1757
        index.add_records([
 
1758
            ((b'new',), b'fulltext,no-eol', (None, 50, 60), [(b'separate',)]),
 
1759
            ((b'new2',), b'fulltext', (None, 0, 6), [(b'new',)]),
 
1760
            ])
 
1761
        self.assertEqual([((b'new', ), b'N50 60', (((b'separate',),),)),
 
1762
                          ((b'new2', ), b' 0 6', (((b'new',),),))],
 
1763
                         sorted(self.caught_entries[0]))
 
1764
        self.assertEqual(1, len(self.caught_entries))
 
1765
 
 
1766
    def test_add_versions_deltas(self):
 
1767
        index = self.two_graph_index(deltas=True, catch_adds=True)
 
1768
        index.add_records([
 
1769
            ((b'new',), b'fulltext,no-eol', (None, 50, 60), [(b'separate',)]),
 
1770
            ((b'new2',), b'line-delta', (None, 0, 6), [(b'new',)]),
 
1771
            ])
 
1772
        self.assertEqual([((b'new', ), b'N50 60', (((b'separate',),), ())),
 
1773
                          ((b'new2', ), b' 0 6', (((b'new',),), ((b'new',),), ))],
 
1774
                         sorted(self.caught_entries[0]))
 
1775
        self.assertEqual(1, len(self.caught_entries))
 
1776
 
 
1777
    def test_add_versions_delta_not_delta_index(self):
 
1778
        index = self.two_graph_index(catch_adds=True)
 
1779
        self.assertRaises(KnitCorrupt, index.add_records,
 
1780
                          [((b'new',), b'no-eol,line-delta', (None, 0, 100), [(b'parent',)])])
 
1781
        self.assertEqual([], self.caught_entries)
 
1782
 
 
1783
    def test_add_versions_random_id_accepted(self):
 
1784
        index = self.two_graph_index(catch_adds=True)
 
1785
        index.add_records([], random_id=True)
 
1786
 
 
1787
    def test_add_versions_same_dup(self):
 
1788
        index = self.two_graph_index(catch_adds=True)
 
1789
        # options can be spelt two different ways
 
1790
        index.add_records([((b'tip',), b'fulltext,no-eol', (None, 0, 100),
 
1791
                            [(b'parent',)])])
 
1792
        index.add_records([((b'tip',), b'no-eol,fulltext', (None, 0, 100),
 
1793
                            [(b'parent',)])])
 
1794
        # position/length are ignored (because each pack could have fulltext or
 
1795
        # delta, and be at a different position.
 
1796
        index.add_records([((b'tip',), b'fulltext,no-eol', (None, 50, 100),
 
1797
                            [(b'parent',)])])
 
1798
        index.add_records([((b'tip',), b'fulltext,no-eol', (None, 0, 1000),
 
1799
                            [(b'parent',)])])
 
1800
        # but neither should have added data.
 
1801
        self.assertEqual([[], [], [], []], self.caught_entries)
 
1802
 
 
1803
    def test_add_versions_different_dup(self):
 
1804
        index = self.two_graph_index(deltas=True, catch_adds=True)
 
1805
        # change options
 
1806
        self.assertRaises(KnitCorrupt, index.add_records,
 
1807
                          [((b'tip',), b'line-delta', (None, 0, 100), [(b'parent',)])])
 
1808
        self.assertRaises(KnitCorrupt, index.add_records,
 
1809
                          [((b'tip',), b'fulltext', (None, 0, 100), [(b'parent',)])])
 
1810
        # parents
 
1811
        self.assertRaises(KnitCorrupt, index.add_records,
 
1812
                          [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [])])
 
1813
        # change options in the second record
 
1814
        self.assertRaises(KnitCorrupt, index.add_records,
 
1815
                          [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [(b'parent',)]),
 
1816
                           ((b'tip',), b'line-delta', (None, 0, 100), [(b'parent',)])])
 
1817
        self.assertEqual([], self.caught_entries)
 
1818
 
 
1819
    def make_g_index_missing_compression_parent(self):
 
1820
        graph_index = self.make_g_index('missing_comp', 2,
 
1821
                                        [((b'tip', ), b' 100 78',
 
1822
                                          ([(b'missing-parent', ), (b'ghost', )], [(b'missing-parent', )]))])
 
1823
        return graph_index
 
1824
 
 
1825
    def make_g_index_missing_parent(self):
 
1826
        graph_index = self.make_g_index('missing_parent', 2,
 
1827
                                        [((b'parent', ), b' 100 78', ([], [])),
 
1828
                                         ((b'tip', ), b' 100 78',
 
1829
                                            ([(b'parent', ), (b'missing-parent', )], [(b'parent', )])),
 
1830
                                         ])
 
1831
        return graph_index
 
1832
 
 
1833
    def make_g_index_no_external_refs(self):
 
1834
        graph_index = self.make_g_index('no_external_refs', 2,
 
1835
                                        [((b'rev', ), b' 100 78',
 
1836
                                          ([(b'parent', ), (b'ghost', )], []))])
 
1837
        return graph_index
 
1838
 
 
1839
    def test_add_good_unvalidated_index(self):
 
1840
        unvalidated = self.make_g_index_no_external_refs()
 
1841
        combined = CombinedGraphIndex([unvalidated])
 
1842
        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
 
1843
        index.scan_unvalidated_index(unvalidated)
 
1844
        self.assertEqual(frozenset(), index.get_missing_compression_parents())
 
1845
 
 
1846
    def test_add_missing_compression_parent_unvalidated_index(self):
 
1847
        unvalidated = self.make_g_index_missing_compression_parent()
 
1848
        combined = CombinedGraphIndex([unvalidated])
 
1849
        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
 
1850
        index.scan_unvalidated_index(unvalidated)
 
1851
        # This also checks that its only the compression parent that is
 
1852
        # examined, otherwise 'ghost' would also be reported as a missing
 
1853
        # parent.
 
1854
        self.assertEqual(
 
1855
            frozenset([(b'missing-parent',)]),
 
1856
            index.get_missing_compression_parents())
 
1857
 
 
1858
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
1859
        unvalidated = self.make_g_index_missing_parent()
 
1860
        combined = CombinedGraphIndex([unvalidated])
 
1861
        index = _KnitGraphIndex(combined, lambda: True, deltas=True,
 
1862
                                track_external_parent_refs=True)
 
1863
        index.scan_unvalidated_index(unvalidated)
 
1864
        self.assertEqual(
 
1865
            frozenset([(b'missing-parent',)]), index.get_missing_parents())
 
1866
 
 
1867
    def test_track_external_parent_refs(self):
 
1868
        g_index = self.make_g_index('empty', 2, [])
 
1869
        combined = CombinedGraphIndex([g_index])
 
1870
        index = _KnitGraphIndex(combined, lambda: True, deltas=True,
 
1871
                                add_callback=self.catch_add, track_external_parent_refs=True)
 
1872
        self.caught_entries = []
 
1873
        index.add_records([
 
1874
            ((b'new-key',), b'fulltext,no-eol', (None, 50, 60),
 
1875
             [(b'parent-1',), (b'parent-2',)])])
 
1876
        self.assertEqual(
 
1877
            frozenset([(b'parent-1',), (b'parent-2',)]),
 
1878
            index.get_missing_parents())
 
1879
 
 
1880
    def test_add_unvalidated_index_with_present_external_references(self):
 
1881
        index = self.two_graph_index(deltas=True)
 
1882
        # Ugly hack to get at one of the underlying GraphIndex objects that
 
1883
        # two_graph_index built.
 
1884
        unvalidated = index._graph_index._indices[1]
 
1885
        # 'parent' is an external ref of _indices[1] (unvalidated), but is
 
1886
        # present in _indices[0].
 
1887
        index.scan_unvalidated_index(unvalidated)
 
1888
        self.assertEqual(frozenset(), index.get_missing_compression_parents())
 
1889
 
 
1890
    def make_new_missing_parent_g_index(self, name):
 
1891
        missing_parent = name.encode('ascii') + b'-missing-parent'
 
1892
        graph_index = self.make_g_index(name, 2,
 
1893
                                        [((name.encode('ascii') + b'tip', ), b' 100 78',
 
1894
                                          ([(missing_parent, ), (b'ghost', )], [(missing_parent, )]))])
 
1895
        return graph_index
 
1896
 
 
1897
    def test_add_mulitiple_unvalidated_indices_with_missing_parents(self):
 
1898
        g_index_1 = self.make_new_missing_parent_g_index('one')
 
1899
        g_index_2 = self.make_new_missing_parent_g_index('two')
 
1900
        combined = CombinedGraphIndex([g_index_1, g_index_2])
 
1901
        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
 
1902
        index.scan_unvalidated_index(g_index_1)
 
1903
        index.scan_unvalidated_index(g_index_2)
 
1904
        self.assertEqual(
 
1905
            frozenset([(b'one-missing-parent',), (b'two-missing-parent',)]),
 
1906
            index.get_missing_compression_parents())
 
1907
 
 
1908
    def test_add_mulitiple_unvalidated_indices_with_mutual_dependencies(self):
 
1909
        graph_index_a = self.make_g_index('one', 2,
 
1910
                                          [((b'parent-one', ), b' 100 78', ([(b'non-compression-parent',)], [])),
 
1911
                                           ((b'child-of-two', ), b' 100 78',
 
1912
                                              ([(b'parent-two',)], [(b'parent-two',)]))])
 
1913
        graph_index_b = self.make_g_index('two', 2,
 
1914
                                          [((b'parent-two', ), b' 100 78', ([(b'non-compression-parent',)], [])),
 
1915
                                           ((b'child-of-one', ), b' 100 78',
 
1916
                                              ([(b'parent-one',)], [(b'parent-one',)]))])
 
1917
        combined = CombinedGraphIndex([graph_index_a, graph_index_b])
 
1918
        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
 
1919
        index.scan_unvalidated_index(graph_index_a)
 
1920
        index.scan_unvalidated_index(graph_index_b)
 
1921
        self.assertEqual(
 
1922
            frozenset([]), index.get_missing_compression_parents())
 
1923
 
 
1924
 
 
1925
class TestNoParentsGraphIndexKnit(KnitTests):
 
1926
    """Tests for knits using _KnitGraphIndex with no parents."""
 
1927
 
 
1928
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
1929
        builder = GraphIndexBuilder(ref_lists)
 
1930
        for node, references in nodes:
 
1931
            builder.add_node(node, references)
 
1932
        stream = builder.finish()
 
1933
        trans = self.get_transport()
 
1934
        size = trans.put_file(name, stream)
 
1935
        return GraphIndex(trans, name, size)
 
1936
 
 
1937
    def test_add_good_unvalidated_index(self):
 
1938
        unvalidated = self.make_g_index('unvalidated')
 
1939
        combined = CombinedGraphIndex([unvalidated])
 
1940
        index = _KnitGraphIndex(combined, lambda: True, parents=False)
 
1941
        index.scan_unvalidated_index(unvalidated)
 
1942
        self.assertEqual(frozenset(),
 
1943
                         index.get_missing_compression_parents())
 
1944
 
 
1945
    def test_parents_deltas_incompatible(self):
 
1946
        index = CombinedGraphIndex([])
 
1947
        self.assertRaises(knit.KnitError, _KnitGraphIndex, lambda: True,
 
1948
                          index, deltas=True, parents=False)
 
1949
 
 
1950
    def two_graph_index(self, catch_adds=False):
 
1951
        """Build a two-graph index.
 
1952
 
 
1953
        :param deltas: If true, use underlying indices with two node-ref
 
1954
            lists and 'parent' set to a delta-compressed against tail.
 
1955
        """
 
1956
        # put several versions in the index.
 
1957
        index1 = self.make_g_index('1', 0, [
 
1958
            ((b'tip', ), b'N0 100'),
 
1959
            ((b'tail', ), b'')])
 
1960
        index2 = self.make_g_index('2', 0, [
 
1961
            ((b'parent', ), b' 100 78'),
 
1962
            ((b'separate', ), b'')])
 
1963
        combined_index = CombinedGraphIndex([index1, index2])
 
1964
        if catch_adds:
 
1965
            self.combined_index = combined_index
 
1966
            self.caught_entries = []
 
1967
            add_callback = self.catch_add
 
1968
        else:
 
1969
            add_callback = None
 
1970
        return _KnitGraphIndex(combined_index, lambda: True, parents=False,
 
1971
                               add_callback=add_callback)
 
1972
 
 
1973
    def test_keys(self):
 
1974
        index = self.two_graph_index()
 
1975
        self.assertEqual({(b'tail',), (b'tip',), (b'parent',), (b'separate',)},
 
1976
                         set(index.keys()))
 
1977
 
 
1978
    def test_get_position(self):
 
1979
        index = self.two_graph_index()
 
1980
        self.assertEqual((index._graph_index._indices[0], 0, 100),
 
1981
                         index.get_position((b'tip',)))
 
1982
        self.assertEqual((index._graph_index._indices[1], 100, 78),
 
1983
                         index.get_position((b'parent',)))
 
1984
 
 
1985
    def test_get_method(self):
 
1986
        index = self.two_graph_index()
 
1987
        self.assertEqual('fulltext', index.get_method((b'tip',)))
 
1988
        self.assertEqual([b'fulltext'], index.get_options((b'parent',)))
 
1989
 
 
1990
    def test_get_options(self):
 
1991
        index = self.two_graph_index()
 
1992
        self.assertEqual([b'fulltext', b'no-eol'],
 
1993
                         index.get_options((b'tip',)))
 
1994
        self.assertEqual([b'fulltext'], index.get_options((b'parent',)))
 
1995
 
 
1996
    def test_get_parent_map(self):
 
1997
        index = self.two_graph_index()
 
1998
        self.assertEqual({(b'parent',): None},
 
1999
                         index.get_parent_map([(b'parent',), (b'ghost',)]))
 
2000
 
 
2001
    def catch_add(self, entries):
 
2002
        self.caught_entries.append(entries)
 
2003
 
 
2004
    def test_add_no_callback_errors(self):
 
2005
        index = self.two_graph_index()
 
2006
        self.assertRaises(errors.ReadOnlyError, index.add_records,
 
2007
                          [((b'new',), b'fulltext,no-eol', (None, 50, 60), [(b'separate',)])])
 
2008
 
 
2009
    def test_add_version_smoke(self):
 
2010
        index = self.two_graph_index(catch_adds=True)
 
2011
        index.add_records(
 
2012
            [((b'new',), b'fulltext,no-eol', (None, 50, 60), [])])
 
2013
        self.assertEqual([[((b'new', ), b'N50 60')]],
 
2014
                         self.caught_entries)
 
2015
 
 
2016
    def test_add_version_delta_not_delta_index(self):
 
2017
        index = self.two_graph_index(catch_adds=True)
 
2018
        self.assertRaises(KnitCorrupt, index.add_records,
 
2019
                          [((b'new',), b'no-eol,line-delta', (None, 0, 100), [])])
 
2020
        self.assertEqual([], self.caught_entries)
 
2021
 
 
2022
    def test_add_version_same_dup(self):
 
2023
        index = self.two_graph_index(catch_adds=True)
 
2024
        # options can be spelt two different ways
 
2025
        index.add_records(
 
2026
            [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [])])
 
2027
        index.add_records(
 
2028
            [((b'tip',), b'no-eol,fulltext', (None, 0, 100), [])])
 
2029
        # position/length are ignored (because each pack could have fulltext or
 
2030
        # delta, and be at a different position.
 
2031
        index.add_records(
 
2032
            [((b'tip',), b'fulltext,no-eol', (None, 50, 100), [])])
 
2033
        index.add_records(
 
2034
            [((b'tip',), b'fulltext,no-eol', (None, 0, 1000), [])])
 
2035
        # but neither should have added data.
 
2036
        self.assertEqual([[], [], [], []], self.caught_entries)
 
2037
 
 
2038
    def test_add_version_different_dup(self):
 
2039
        index = self.two_graph_index(catch_adds=True)
 
2040
        # change options
 
2041
        self.assertRaises(KnitCorrupt, index.add_records,
 
2042
                          [((b'tip',), b'no-eol,line-delta', (None, 0, 100), [])])
 
2043
        self.assertRaises(KnitCorrupt, index.add_records,
 
2044
                          [((b'tip',), b'line-delta,no-eol', (None, 0, 100), [])])
 
2045
        self.assertRaises(KnitCorrupt, index.add_records,
 
2046
                          [((b'tip',), b'fulltext', (None, 0, 100), [])])
 
2047
        # parents
 
2048
        self.assertRaises(KnitCorrupt, index.add_records,
 
2049
                          [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [(b'parent',)])])
 
2050
        self.assertEqual([], self.caught_entries)
 
2051
 
 
2052
    def test_add_versions(self):
 
2053
        index = self.two_graph_index(catch_adds=True)
 
2054
        index.add_records([
 
2055
            ((b'new',), b'fulltext,no-eol', (None, 50, 60), []),
 
2056
            ((b'new2',), b'fulltext', (None, 0, 6), []),
 
2057
            ])
 
2058
        self.assertEqual([((b'new', ), b'N50 60'), ((b'new2', ), b' 0 6')],
 
2059
                         sorted(self.caught_entries[0]))
 
2060
        self.assertEqual(1, len(self.caught_entries))
 
2061
 
 
2062
    def test_add_versions_delta_not_delta_index(self):
 
2063
        index = self.two_graph_index(catch_adds=True)
 
2064
        self.assertRaises(KnitCorrupt, index.add_records,
 
2065
                          [((b'new',), b'no-eol,line-delta', (None, 0, 100), [(b'parent',)])])
 
2066
        self.assertEqual([], self.caught_entries)
 
2067
 
 
2068
    def test_add_versions_parents_not_parents_index(self):
 
2069
        index = self.two_graph_index(catch_adds=True)
 
2070
        self.assertRaises(KnitCorrupt, index.add_records,
 
2071
                          [((b'new',), b'no-eol,fulltext', (None, 0, 100), [(b'parent',)])])
 
2072
        self.assertEqual([], self.caught_entries)
 
2073
 
 
2074
    def test_add_versions_random_id_accepted(self):
 
2075
        index = self.two_graph_index(catch_adds=True)
 
2076
        index.add_records([], random_id=True)
 
2077
 
 
2078
    def test_add_versions_same_dup(self):
 
2079
        index = self.two_graph_index(catch_adds=True)
 
2080
        # options can be spelt two different ways
 
2081
        index.add_records(
 
2082
            [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [])])
 
2083
        index.add_records(
 
2084
            [((b'tip',), b'no-eol,fulltext', (None, 0, 100), [])])
 
2085
        # position/length are ignored (because each pack could have fulltext or
 
2086
        # delta, and be at a different position.
 
2087
        index.add_records(
 
2088
            [((b'tip',), b'fulltext,no-eol', (None, 50, 100), [])])
 
2089
        index.add_records(
 
2090
            [((b'tip',), b'fulltext,no-eol', (None, 0, 1000), [])])
 
2091
        # but neither should have added data.
 
2092
        self.assertEqual([[], [], [], []], self.caught_entries)
 
2093
 
 
2094
    def test_add_versions_different_dup(self):
 
2095
        index = self.two_graph_index(catch_adds=True)
 
2096
        # change options
 
2097
        self.assertRaises(KnitCorrupt, index.add_records,
 
2098
                          [((b'tip',), b'no-eol,line-delta', (None, 0, 100), [])])
 
2099
        self.assertRaises(KnitCorrupt, index.add_records,
 
2100
                          [((b'tip',), b'line-delta,no-eol', (None, 0, 100), [])])
 
2101
        self.assertRaises(KnitCorrupt, index.add_records,
 
2102
                          [((b'tip',), b'fulltext', (None, 0, 100), [])])
 
2103
        # parents
 
2104
        self.assertRaises(KnitCorrupt, index.add_records,
 
2105
                          [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [(b'parent',)])])
 
2106
        # change options in the second record
 
2107
        self.assertRaises(KnitCorrupt, index.add_records,
 
2108
                          [((b'tip',), b'fulltext,no-eol', (None, 0, 100), []),
 
2109
                           ((b'tip',), b'no-eol,line-delta', (None, 0, 100), [])])
 
2110
        self.assertEqual([], self.caught_entries)
 
2111
 
 
2112
 
 
2113
class TestKnitVersionedFiles(KnitTests):
 
2114
 
 
2115
    def assertGroupKeysForIo(self, exp_groups, keys, non_local_keys,
 
2116
                             positions, _min_buffer_size=None):
 
2117
        kvf = self.make_test_knit()
 
2118
        if _min_buffer_size is None:
 
2119
            _min_buffer_size = knit._STREAM_MIN_BUFFER_SIZE
 
2120
        self.assertEqual(exp_groups, kvf._group_keys_for_io(keys,
 
2121
                                                            non_local_keys, positions,
 
2122
                                                            _min_buffer_size=_min_buffer_size))
 
2123
 
 
2124
    def assertSplitByPrefix(self, expected_map, expected_prefix_order,
 
2125
                            keys):
 
2126
        split, prefix_order = KnitVersionedFiles._split_by_prefix(keys)
 
2127
        self.assertEqual(expected_map, split)
 
2128
        self.assertEqual(expected_prefix_order, prefix_order)
 
2129
 
 
2130
    def test__group_keys_for_io(self):
 
2131
        ft_detail = ('fulltext', False)
 
2132
        ld_detail = ('line-delta', False)
 
2133
        f_a = (b'f', b'a')
 
2134
        f_b = (b'f', b'b')
 
2135
        f_c = (b'f', b'c')
 
2136
        g_a = (b'g', b'a')
 
2137
        g_b = (b'g', b'b')
 
2138
        g_c = (b'g', b'c')
 
2139
        positions = {
 
2140
            f_a: (ft_detail, (f_a, 0, 100), None),
 
2141
            f_b: (ld_detail, (f_b, 100, 21), f_a),
 
2142
            f_c: (ld_detail, (f_c, 180, 15), f_b),
 
2143
            g_a: (ft_detail, (g_a, 121, 35), None),
 
2144
            g_b: (ld_detail, (g_b, 156, 12), g_a),
 
2145
            g_c: (ld_detail, (g_c, 195, 13), g_a),
 
2146
            }
 
2147
        self.assertGroupKeysForIo([([f_a], set())],
 
2148
                                  [f_a], [], positions)
 
2149
        self.assertGroupKeysForIo([([f_a], {f_a})],
 
2150
                                  [f_a], [f_a], positions)
 
2151
        self.assertGroupKeysForIo([([f_a, f_b], set([]))],
 
2152
                                  [f_a, f_b], [], positions)
 
2153
        self.assertGroupKeysForIo([([f_a, f_b], {f_b})],
 
2154
                                  [f_a, f_b], [f_b], positions)
 
2155
        self.assertGroupKeysForIo([([f_a, f_b, g_a, g_b], set())],
 
2156
                                  [f_a, g_a, f_b, g_b], [], positions)
 
2157
        self.assertGroupKeysForIo([([f_a, f_b, g_a, g_b], set())],
 
2158
                                  [f_a, g_a, f_b, g_b], [], positions,
 
2159
                                  _min_buffer_size=150)
 
2160
        self.assertGroupKeysForIo([([f_a, f_b], set()), ([g_a, g_b], set())],
 
2161
                                  [f_a, g_a, f_b, g_b], [], positions,
 
2162
                                  _min_buffer_size=100)
 
2163
        self.assertGroupKeysForIo([([f_c], set()), ([g_b], set())],
 
2164
                                  [f_c, g_b], [], positions,
 
2165
                                  _min_buffer_size=125)
 
2166
        self.assertGroupKeysForIo([([g_b, f_c], set())],
 
2167
                                  [g_b, f_c], [], positions,
 
2168
                                  _min_buffer_size=125)
 
2169
 
 
2170
    def test__split_by_prefix(self):
 
2171
        self.assertSplitByPrefix({b'f': [(b'f', b'a'), (b'f', b'b')],
 
2172
                                  b'g': [(b'g', b'b'), (b'g', b'a')],
 
2173
                                  }, [b'f', b'g'],
 
2174
                                 [(b'f', b'a'), (b'g', b'b'),
 
2175
                                  (b'g', b'a'), (b'f', b'b')])
 
2176
 
 
2177
        self.assertSplitByPrefix({b'f': [(b'f', b'a'), (b'f', b'b')],
 
2178
                                  b'g': [(b'g', b'b'), (b'g', b'a')],
 
2179
                                  }, [b'f', b'g'],
 
2180
                                 [(b'f', b'a'), (b'f', b'b'),
 
2181
                                  (b'g', b'b'), (b'g', b'a')])
 
2182
 
 
2183
        self.assertSplitByPrefix({b'f': [(b'f', b'a'), (b'f', b'b')],
 
2184
                                  b'g': [(b'g', b'b'), (b'g', b'a')],
 
2185
                                  }, [b'f', b'g'],
 
2186
                                 [(b'f', b'a'), (b'f', b'b'),
 
2187
                                  (b'g', b'b'), (b'g', b'a')])
 
2188
 
 
2189
        self.assertSplitByPrefix({b'f': [(b'f', b'a'), (b'f', b'b')],
 
2190
                                  b'g': [(b'g', b'b'), (b'g', b'a')],
 
2191
                                  b'': [(b'a',), (b'b',)]
 
2192
                                  }, [b'f', b'g', b''],
 
2193
                                 [(b'f', b'a'), (b'g', b'b'),
 
2194
                                  (b'a',), (b'b',),
 
2195
                                  (b'g', b'a'), (b'f', b'b')])
 
2196
 
 
2197
 
 
2198
class TestStacking(KnitTests):
 
2199
 
 
2200
    def get_basis_and_test_knit(self):
 
2201
        basis = self.make_test_knit(name='basis')
 
2202
        basis = RecordingVersionedFilesDecorator(basis)
 
2203
        test = self.make_test_knit(name='test')
 
2204
        test.add_fallback_versioned_files(basis)
 
2205
        return basis, test
 
2206
 
 
2207
    def test_add_fallback_versioned_files(self):
 
2208
        basis = self.make_test_knit(name='basis')
 
2209
        test = self.make_test_knit(name='test')
 
2210
        # It must not error; other tests test that the fallback is referred to
 
2211
        # when accessing data.
 
2212
        test.add_fallback_versioned_files(basis)
 
2213
 
 
2214
    def test_add_lines(self):
 
2215
        # lines added to the test are not added to the basis
 
2216
        basis, test = self.get_basis_and_test_knit()
 
2217
        key = (b'foo',)
 
2218
        key_basis = (b'bar',)
 
2219
        key_cross_border = (b'quux',)
 
2220
        key_delta = (b'zaphod',)
 
2221
        test.add_lines(key, (), [b'foo\n'])
 
2222
        self.assertEqual({}, basis.get_parent_map([key]))
 
2223
        # lines added to the test that reference across the stack do a
 
2224
        # fulltext.
 
2225
        basis.add_lines(key_basis, (), [b'foo\n'])
 
2226
        basis.calls = []
 
2227
        test.add_lines(key_cross_border, (key_basis,), [b'foo\n'])
 
2228
        self.assertEqual('fulltext', test._index.get_method(key_cross_border))
 
2229
        # we don't even need to look at the basis to see that this should be
 
2230
        # stored as a fulltext
 
2231
        self.assertEqual([], basis.calls)
 
2232
        # Subsequent adds do delta.
 
2233
        basis.calls = []
 
2234
        test.add_lines(key_delta, (key_cross_border,), [b'foo\n'])
 
2235
        self.assertEqual('line-delta', test._index.get_method(key_delta))
 
2236
        self.assertEqual([], basis.calls)
 
2237
 
 
2238
    def test_annotate(self):
 
2239
        # annotations from the test knit are answered without asking the basis
 
2240
        basis, test = self.get_basis_and_test_knit()
 
2241
        key = (b'foo',)
 
2242
        key_basis = (b'bar',)
 
2243
        test.add_lines(key, (), [b'foo\n'])
 
2244
        details = test.annotate(key)
 
2245
        self.assertEqual([(key, b'foo\n')], details)
 
2246
        self.assertEqual([], basis.calls)
 
2247
        # But texts that are not in the test knit are looked for in the basis
 
2248
        # directly.
 
2249
        basis.add_lines(key_basis, (), [b'foo\n', b'bar\n'])
 
2250
        basis.calls = []
 
2251
        details = test.annotate(key_basis)
 
2252
        self.assertEqual(
 
2253
            [(key_basis, b'foo\n'), (key_basis, b'bar\n')], details)
 
2254
        # Not optimised to date:
 
2255
        # self.assertEqual([("annotate", key_basis)], basis.calls)
 
2256
        self.assertEqual([('get_parent_map', {key_basis}),
 
2257
                          ('get_parent_map', {key_basis}),
 
2258
                          ('get_record_stream', [key_basis], 'topological', True)],
 
2259
                         basis.calls)
 
2260
 
 
2261
    def test_check(self):
 
2262
        # At the moment checking a stacked knit does implicitly check the
 
2263
        # fallback files.
 
2264
        basis, test = self.get_basis_and_test_knit()
 
2265
        test.check()
 
2266
 
 
2267
    def test_get_parent_map(self):
 
2268
        # parents in the test knit are answered without asking the basis
 
2269
        basis, test = self.get_basis_and_test_knit()
 
2270
        key = (b'foo',)
 
2271
        key_basis = (b'bar',)
 
2272
        key_missing = (b'missing',)
 
2273
        test.add_lines(key, (), [])
 
2274
        parent_map = test.get_parent_map([key])
 
2275
        self.assertEqual({key: ()}, parent_map)
 
2276
        self.assertEqual([], basis.calls)
 
2277
        # But parents that are not in the test knit are looked for in the basis
 
2278
        basis.add_lines(key_basis, (), [])
 
2279
        basis.calls = []
 
2280
        parent_map = test.get_parent_map([key, key_basis, key_missing])
 
2281
        self.assertEqual({key: (),
 
2282
                          key_basis: ()}, parent_map)
 
2283
        self.assertEqual([("get_parent_map", {key_basis, key_missing})],
 
2284
                         basis.calls)
 
2285
 
 
2286
    def test_get_record_stream_unordered_fulltexts(self):
 
2287
        # records from the test knit are answered without asking the basis:
 
2288
        basis, test = self.get_basis_and_test_knit()
 
2289
        key = (b'foo',)
 
2290
        key_basis = (b'bar',)
 
2291
        key_missing = (b'missing',)
 
2292
        test.add_lines(key, (), [b'foo\n'])
 
2293
        records = list(test.get_record_stream([key], 'unordered', True))
 
2294
        self.assertEqual(1, len(records))
 
2295
        self.assertEqual([], basis.calls)
 
2296
        # Missing (from test knit) objects are retrieved from the basis:
 
2297
        basis.add_lines(key_basis, (), [b'foo\n', b'bar\n'])
 
2298
        basis.calls = []
 
2299
        records = list(test.get_record_stream([key_basis, key_missing],
 
2300
                                              'unordered', True))
 
2301
        self.assertEqual(2, len(records))
 
2302
        calls = list(basis.calls)
 
2303
        for record in records:
 
2304
            self.assertSubset([record.key], (key_basis, key_missing))
 
2305
            if record.key == key_missing:
 
2306
                self.assertIsInstance(record, AbsentContentFactory)
 
2307
            else:
 
2308
                reference = list(basis.get_record_stream([key_basis],
 
2309
                                                         'unordered', True))[0]
 
2310
                self.assertEqual(reference.key, record.key)
 
2311
                self.assertEqual(reference.sha1, record.sha1)
 
2312
                self.assertEqual(reference.storage_kind, record.storage_kind)
 
2313
                self.assertEqual(reference.get_bytes_as(reference.storage_kind),
 
2314
                                 record.get_bytes_as(record.storage_kind))
 
2315
                self.assertEqual(reference.get_bytes_as('fulltext'),
 
2316
                                 record.get_bytes_as('fulltext'))
 
2317
        # It's not strictly minimal, but it seems reasonable for now for it to
 
2318
        # ask which fallbacks have which parents.
 
2319
        self.assertEqual([
 
2320
            ("get_parent_map", {key_basis, key_missing}),
 
2321
            ("get_record_stream", [key_basis], 'unordered', True)],
 
2322
            calls)
 
2323
 
 
2324
    def test_get_record_stream_ordered_fulltexts(self):
 
2325
        # ordering is preserved down into the fallback store.
 
2326
        basis, test = self.get_basis_and_test_knit()
 
2327
        key = (b'foo',)
 
2328
        key_basis = (b'bar',)
 
2329
        key_basis_2 = (b'quux',)
 
2330
        key_missing = (b'missing',)
 
2331
        test.add_lines(key, (key_basis,), [b'foo\n'])
 
2332
        # Missing (from test knit) objects are retrieved from the basis:
 
2333
        basis.add_lines(key_basis, (key_basis_2,), [b'foo\n', b'bar\n'])
 
2334
        basis.add_lines(key_basis_2, (), [b'quux\n'])
 
2335
        basis.calls = []
 
2336
        # ask for in non-topological order
 
2337
        records = list(test.get_record_stream(
 
2338
            [key, key_basis, key_missing, key_basis_2], 'topological', True))
 
2339
        self.assertEqual(4, len(records))
 
2340
        results = []
 
2341
        for record in records:
 
2342
            self.assertSubset([record.key],
 
2343
                              (key_basis, key_missing, key_basis_2, key))
 
2344
            if record.key == key_missing:
 
2345
                self.assertIsInstance(record, AbsentContentFactory)
 
2346
            else:
 
2347
                results.append((record.key, record.sha1, record.storage_kind,
 
2348
                                record.get_bytes_as('fulltext')))
 
2349
        calls = list(basis.calls)
 
2350
        order = [record[0] for record in results]
 
2351
        self.assertEqual([key_basis_2, key_basis, key], order)
 
2352
        for result in results:
 
2353
            if result[0] == key:
 
2354
                source = test
 
2355
            else:
 
2356
                source = basis
 
2357
            record = next(source.get_record_stream([result[0]], 'unordered',
 
2358
                                                   True))
 
2359
            self.assertEqual(record.key, result[0])
 
2360
            self.assertEqual(record.sha1, result[1])
 
2361
            # We used to check that the storage kind matched, but actually it
 
2362
            # depends on whether it was sourced from the basis, or in a single
 
2363
            # group, because asking for full texts returns proxy objects to a
 
2364
            # _ContentMapGenerator object; so checking the kind is unneeded.
 
2365
            self.assertEqual(record.get_bytes_as('fulltext'), result[3])
 
2366
        # It's not strictly minimal, but it seems reasonable for now for it to
 
2367
        # ask which fallbacks have which parents.
 
2368
        self.assertEqual(2, len(calls))
 
2369
        self.assertEqual(
 
2370
            ("get_parent_map", {key_basis, key_basis_2, key_missing}),
 
2371
            calls[0])
 
2372
        # topological is requested from the fallback, because that is what
 
2373
        # was requested at the top level.
 
2374
        self.assertIn(
 
2375
            calls[1], [
 
2376
                ("get_record_stream", [key_basis_2,
 
2377
                                       key_basis], 'topological', True),
 
2378
                ("get_record_stream", [key_basis, key_basis_2], 'topological', True)])
 
2379
 
 
2380
    def test_get_record_stream_unordered_deltas(self):
 
2381
        # records from the test knit are answered without asking the basis:
 
2382
        basis, test = self.get_basis_and_test_knit()
 
2383
        key = (b'foo',)
 
2384
        key_basis = (b'bar',)
 
2385
        key_missing = (b'missing',)
 
2386
        test.add_lines(key, (), [b'foo\n'])
 
2387
        records = list(test.get_record_stream([key], 'unordered', False))
 
2388
        self.assertEqual(1, len(records))
 
2389
        self.assertEqual([], basis.calls)
 
2390
        # Missing (from test knit) objects are retrieved from the basis:
 
2391
        basis.add_lines(key_basis, (), [b'foo\n', b'bar\n'])
 
2392
        basis.calls = []
 
2393
        records = list(test.get_record_stream([key_basis, key_missing],
 
2394
                                              'unordered', False))
 
2395
        self.assertEqual(2, len(records))
 
2396
        calls = list(basis.calls)
 
2397
        for record in records:
 
2398
            self.assertSubset([record.key], (key_basis, key_missing))
 
2399
            if record.key == key_missing:
 
2400
                self.assertIsInstance(record, AbsentContentFactory)
 
2401
            else:
 
2402
                reference = list(basis.get_record_stream([key_basis],
 
2403
                                                         'unordered', False))[0]
 
2404
                self.assertEqual(reference.key, record.key)
 
2405
                self.assertEqual(reference.sha1, record.sha1)
 
2406
                self.assertEqual(reference.storage_kind, record.storage_kind)
 
2407
                self.assertEqual(reference.get_bytes_as(reference.storage_kind),
 
2408
                                 record.get_bytes_as(record.storage_kind))
 
2409
        # It's not strictly minimal, but it seems reasonable for now for it to
 
2410
        # ask which fallbacks have which parents.
 
2411
        self.assertEqual([
 
2412
            ("get_parent_map", {key_basis, key_missing}),
 
2413
            ("get_record_stream", [key_basis], 'unordered', False)],
 
2414
            calls)
 
2415
 
 
2416
    def test_get_record_stream_ordered_deltas(self):
 
2417
        # ordering is preserved down into the fallback store.
 
2418
        basis, test = self.get_basis_and_test_knit()
 
2419
        key = (b'foo',)
 
2420
        key_basis = (b'bar',)
 
2421
        key_basis_2 = (b'quux',)
 
2422
        key_missing = (b'missing',)
 
2423
        test.add_lines(key, (key_basis,), [b'foo\n'])
 
2424
        # Missing (from test knit) objects are retrieved from the basis:
 
2425
        basis.add_lines(key_basis, (key_basis_2,), [b'foo\n', b'bar\n'])
 
2426
        basis.add_lines(key_basis_2, (), [b'quux\n'])
 
2427
        basis.calls = []
 
2428
        # ask for in non-topological order
 
2429
        records = list(test.get_record_stream(
 
2430
            [key, key_basis, key_missing, key_basis_2], 'topological', False))
 
2431
        self.assertEqual(4, len(records))
 
2432
        results = []
 
2433
        for record in records:
 
2434
            self.assertSubset([record.key],
 
2435
                              (key_basis, key_missing, key_basis_2, key))
 
2436
            if record.key == key_missing:
 
2437
                self.assertIsInstance(record, AbsentContentFactory)
 
2438
            else:
 
2439
                results.append((record.key, record.sha1, record.storage_kind,
 
2440
                                record.get_bytes_as(record.storage_kind)))
 
2441
        calls = list(basis.calls)
 
2442
        order = [record[0] for record in results]
 
2443
        self.assertEqual([key_basis_2, key_basis, key], order)
 
2444
        for result in results:
 
2445
            if result[0] == key:
 
2446
                source = test
 
2447
            else:
 
2448
                source = basis
 
2449
            record = next(source.get_record_stream([result[0]], 'unordered',
 
2450
                                                   False))
 
2451
            self.assertEqual(record.key, result[0])
 
2452
            self.assertEqual(record.sha1, result[1])
 
2453
            self.assertEqual(record.storage_kind, result[2])
 
2454
            self.assertEqual(record.get_bytes_as(
 
2455
                record.storage_kind), result[3])
 
2456
        # It's not strictly minimal, but it seems reasonable for now for it to
 
2457
        # ask which fallbacks have which parents.
 
2458
        self.assertEqual([
 
2459
            ("get_parent_map", {key_basis, key_basis_2, key_missing}),
 
2460
            ("get_record_stream", [key_basis_2, key_basis], 'topological', False)],
 
2461
            calls)
 
2462
 
 
2463
    def test_get_sha1s(self):
 
2464
        # sha1's in the test knit are answered without asking the basis
 
2465
        basis, test = self.get_basis_and_test_knit()
 
2466
        key = (b'foo',)
 
2467
        key_basis = (b'bar',)
 
2468
        key_missing = (b'missing',)
 
2469
        test.add_lines(key, (), [b'foo\n'])
 
2470
        key_sha1sum = osutils.sha_string(b'foo\n')
 
2471
        sha1s = test.get_sha1s([key])
 
2472
        self.assertEqual({key: key_sha1sum}, sha1s)
 
2473
        self.assertEqual([], basis.calls)
 
2474
        # But texts that are not in the test knit are looked for in the basis
 
2475
        # directly (rather than via text reconstruction) so that remote servers
 
2476
        # etc don't have to answer with full content.
 
2477
        basis.add_lines(key_basis, (), [b'foo\n', b'bar\n'])
 
2478
        basis_sha1sum = osutils.sha_string(b'foo\nbar\n')
 
2479
        basis.calls = []
 
2480
        sha1s = test.get_sha1s([key, key_missing, key_basis])
 
2481
        self.assertEqual({key: key_sha1sum,
 
2482
                          key_basis: basis_sha1sum}, sha1s)
 
2483
        self.assertEqual([("get_sha1s", {key_basis, key_missing})],
 
2484
                         basis.calls)
 
2485
 
 
2486
    def test_insert_record_stream(self):
 
2487
        # records are inserted as normal; insert_record_stream builds on
 
2488
        # add_lines, so a smoke test should be all that's needed:
 
2489
        key_basis = (b'bar',)
 
2490
        key_delta = (b'zaphod',)
 
2491
        basis, test = self.get_basis_and_test_knit()
 
2492
        source = self.make_test_knit(name='source')
 
2493
        basis.add_lines(key_basis, (), [b'foo\n'])
 
2494
        basis.calls = []
 
2495
        source.add_lines(key_basis, (), [b'foo\n'])
 
2496
        source.add_lines(key_delta, (key_basis,), [b'bar\n'])
 
2497
        stream = source.get_record_stream([key_delta], 'unordered', False)
 
2498
        test.insert_record_stream(stream)
 
2499
        # XXX: this does somewhat too many calls in making sure of whether it
 
2500
        # has to recreate the full text.
 
2501
        self.assertEqual([("get_parent_map", {key_basis}),
 
2502
                          ('get_parent_map', {key_basis}),
 
2503
                          ('get_record_stream', [key_basis], 'unordered', True)],
 
2504
                         basis.calls)
 
2505
        self.assertEqual({key_delta: (key_basis,)},
 
2506
                         test.get_parent_map([key_delta]))
 
2507
        self.assertEqual(b'bar\n', next(test.get_record_stream([key_delta],
 
2508
                                                               'unordered', True)).get_bytes_as('fulltext'))
 
2509
 
 
2510
    def test_iter_lines_added_or_present_in_keys(self):
 
2511
        # Lines from the basis are returned, and lines for a given key are only
 
2512
        # returned once.
 
2513
        key1 = (b'foo1',)
 
2514
        key2 = (b'foo2',)
 
2515
        # all sources are asked for keys:
 
2516
        basis, test = self.get_basis_and_test_knit()
 
2517
        basis.add_lines(key1, (), [b"foo"])
 
2518
        basis.calls = []
 
2519
        lines = list(test.iter_lines_added_or_present_in_keys([key1]))
 
2520
        self.assertEqual([(b"foo\n", key1)], lines)
 
2521
        self.assertEqual([("iter_lines_added_or_present_in_keys", {key1})],
 
2522
                         basis.calls)
 
2523
        # keys in both are not duplicated:
 
2524
        test.add_lines(key2, (), [b"bar\n"])
 
2525
        basis.add_lines(key2, (), [b"bar\n"])
 
2526
        basis.calls = []
 
2527
        lines = list(test.iter_lines_added_or_present_in_keys([key2]))
 
2528
        self.assertEqual([(b"bar\n", key2)], lines)
 
2529
        self.assertEqual([], basis.calls)
 
2530
 
 
2531
    def test_keys(self):
 
2532
        key1 = (b'foo1',)
 
2533
        key2 = (b'foo2',)
 
2534
        # all sources are asked for keys:
 
2535
        basis, test = self.get_basis_and_test_knit()
 
2536
        keys = test.keys()
 
2537
        self.assertEqual(set(), set(keys))
 
2538
        self.assertEqual([("keys",)], basis.calls)
 
2539
        # keys from a basis are returned:
 
2540
        basis.add_lines(key1, (), [])
 
2541
        basis.calls = []
 
2542
        keys = test.keys()
 
2543
        self.assertEqual({key1}, set(keys))
 
2544
        self.assertEqual([("keys",)], basis.calls)
 
2545
        # keys in both are not duplicated:
 
2546
        test.add_lines(key2, (), [])
 
2547
        basis.add_lines(key2, (), [])
 
2548
        basis.calls = []
 
2549
        keys = test.keys()
 
2550
        self.assertEqual(2, len(keys))
 
2551
        self.assertEqual({key1, key2}, set(keys))
 
2552
        self.assertEqual([("keys",)], basis.calls)
 
2553
 
 
2554
    def test_add_mpdiffs(self):
 
2555
        # records are inserted as normal; add_mpdiff builds on
 
2556
        # add_lines, so a smoke test should be all that's needed:
 
2557
        key_basis = (b'bar',)
 
2558
        key_delta = (b'zaphod',)
 
2559
        basis, test = self.get_basis_and_test_knit()
 
2560
        source = self.make_test_knit(name='source')
 
2561
        basis.add_lines(key_basis, (), [b'foo\n'])
 
2562
        basis.calls = []
 
2563
        source.add_lines(key_basis, (), [b'foo\n'])
 
2564
        source.add_lines(key_delta, (key_basis,), [b'bar\n'])
 
2565
        diffs = source.make_mpdiffs([key_delta])
 
2566
        test.add_mpdiffs([(key_delta, (key_basis,),
 
2567
                           source.get_sha1s([key_delta])[key_delta], diffs[0])])
 
2568
        self.assertEqual([("get_parent_map", {key_basis}),
 
2569
                          ('get_record_stream', [key_basis], 'unordered', True), ],
 
2570
                         basis.calls)
 
2571
        self.assertEqual({key_delta: (key_basis,)},
 
2572
                         test.get_parent_map([key_delta]))
 
2573
        self.assertEqual(b'bar\n', next(test.get_record_stream([key_delta],
 
2574
                                                               'unordered', True)).get_bytes_as('fulltext'))
 
2575
 
 
2576
    def test_make_mpdiffs(self):
 
2577
        # Generating an mpdiff across a stacking boundary should detect parent
 
2578
        # texts regions.
 
2579
        key = (b'foo',)
 
2580
        key_left = (b'bar',)
 
2581
        key_right = (b'zaphod',)
 
2582
        basis, test = self.get_basis_and_test_knit()
 
2583
        basis.add_lines(key_left, (), [b'bar\n'])
 
2584
        basis.add_lines(key_right, (), [b'zaphod\n'])
 
2585
        basis.calls = []
 
2586
        test.add_lines(key, (key_left, key_right),
 
2587
                       [b'bar\n', b'foo\n', b'zaphod\n'])
 
2588
        diffs = test.make_mpdiffs([key])
 
2589
        self.assertEqual([
 
2590
            multiparent.MultiParent([multiparent.ParentText(0, 0, 0, 1),
 
2591
                                     multiparent.NewText([b'foo\n']),
 
2592
                                     multiparent.ParentText(1, 0, 2, 1)])],
 
2593
                         diffs)
 
2594
        self.assertEqual(3, len(basis.calls))
 
2595
        self.assertEqual([
 
2596
            ("get_parent_map", {key_left, key_right}),
 
2597
            ("get_parent_map", {key_left, key_right}),
 
2598
            ],
 
2599
            basis.calls[:-1])
 
2600
        last_call = basis.calls[-1]
 
2601
        self.assertEqual('get_record_stream', last_call[0])
 
2602
        self.assertEqual({key_left, key_right}, set(last_call[1]))
 
2603
        self.assertEqual('topological', last_call[2])
 
2604
        self.assertEqual(True, last_call[3])
 
2605
 
 
2606
 
 
2607
class TestNetworkBehaviour(KnitTests):
 
2608
    """Tests for getting data out of/into knits over the network."""
 
2609
 
 
2610
    def test_include_delta_closure_generates_a_knit_delta_closure(self):
 
2611
        vf = self.make_test_knit(name='test')
 
2612
        # put in three texts, giving ft, delta, delta
 
2613
        vf.add_lines((b'base',), (), [b'base\n', b'content\n'])
 
2614
        vf.add_lines((b'd1',), ((b'base',),), [b'd1\n'])
 
2615
        vf.add_lines((b'd2',), ((b'd1',),), [b'd2\n'])
 
2616
        # But heuristics could interfere, so check what happened:
 
2617
        self.assertEqual(['knit-ft-gz', 'knit-delta-gz', 'knit-delta-gz'],
 
2618
                         [record.storage_kind for record in
 
2619
                          vf.get_record_stream([(b'base',), (b'd1',), (b'd2',)],
 
2620
                                               'topological', False)])
 
2621
        # generate a stream of just the deltas include_delta_closure=True,
 
2622
        # serialise to the network, and check that we get a delta closure on the wire.
 
2623
        stream = vf.get_record_stream(
 
2624
            [(b'd1',), (b'd2',)], 'topological', True)
 
2625
        netb = [record.get_bytes_as(record.storage_kind) for record in stream]
 
2626
        # The first bytes should be a memo from _ContentMapGenerator, and the
 
2627
        # second bytes should be empty (because its a API proxy not something
 
2628
        # for wire serialisation.
 
2629
        self.assertEqual(b'', netb[1])
 
2630
        bytes = netb[0]
 
2631
        kind, line_end = network_bytes_to_kind_and_offset(bytes)
 
2632
        self.assertEqual('knit-delta-closure', kind)
 
2633
 
 
2634
 
 
2635
class TestContentMapGenerator(KnitTests):
 
2636
    """Tests for ContentMapGenerator"""
 
2637
 
 
2638
    def test_get_record_stream_gives_records(self):
 
2639
        vf = self.make_test_knit(name='test')
 
2640
        # put in three texts, giving ft, delta, delta
 
2641
        vf.add_lines((b'base',), (), [b'base\n', b'content\n'])
 
2642
        vf.add_lines((b'd1',), ((b'base',),), [b'd1\n'])
 
2643
        vf.add_lines((b'd2',), ((b'd1',),), [b'd2\n'])
 
2644
        keys = [(b'd1',), (b'd2',)]
 
2645
        generator = _VFContentMapGenerator(vf, keys,
 
2646
                                           global_map=vf.get_parent_map(keys))
 
2647
        for record in generator.get_record_stream():
 
2648
            if record.key == (b'd1',):
 
2649
                self.assertEqual(b'd1\n', record.get_bytes_as('fulltext'))
 
2650
            else:
 
2651
                self.assertEqual(b'd2\n', record.get_bytes_as('fulltext'))
 
2652
 
 
2653
    def test_get_record_stream_kinds_are_raw(self):
 
2654
        vf = self.make_test_knit(name='test')
 
2655
        # put in three texts, giving ft, delta, delta
 
2656
        vf.add_lines((b'base',), (), [b'base\n', b'content\n'])
 
2657
        vf.add_lines((b'd1',), ((b'base',),), [b'd1\n'])
 
2658
        vf.add_lines((b'd2',), ((b'd1',),), [b'd2\n'])
 
2659
        keys = [(b'base',), (b'd1',), (b'd2',)]
 
2660
        generator = _VFContentMapGenerator(vf, keys,
 
2661
                                           global_map=vf.get_parent_map(keys))
 
2662
        kinds = {(b'base',): 'knit-delta-closure',
 
2663
                 (b'd1',): 'knit-delta-closure-ref',
 
2664
                 (b'd2',): 'knit-delta-closure-ref',
 
2665
                 }
 
2666
        for record in generator.get_record_stream():
 
2667
            self.assertEqual(kinds[record.key], record.storage_kind)