/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_knit.py

  • Committer: Martin von Gagern
  • Date: 2010-04-20 08:47:38 UTC
  • mfrom: (5167 +trunk)
  • mto: This revision was merged to the branch mainline in revision 5195.
  • Revision ID: martin.vgagern@gmx.net-20100420084738-ygymnqmdllzrhpfn
merge trunk

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
 
1
# Copyright (C) 2006-2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
"""Tests for Knit data structure"""
18
18
 
28
28
    multiparent,
29
29
    osutils,
30
30
    pack,
 
31
    tests,
31
32
    )
32
33
from bzrlib.errors import (
33
34
    RevisionAlreadyPresent,
42
43
    KnitSequenceMatcher,
43
44
    KnitVersionedFiles,
44
45
    PlainKnitContent,
 
46
    _VFContentMapGenerator,
45
47
    _DirectPackAccess,
46
48
    _KndxIndex,
47
49
    _KnitGraphIndex,
48
50
    _KnitKeyAccess,
49
51
    make_file_factory,
50
52
    )
 
53
from bzrlib.repofmt import pack_repo
51
54
from bzrlib.tests import (
52
55
    Feature,
53
56
    KnownFailure,
54
57
    TestCase,
55
58
    TestCaseWithMemoryTransport,
56
59
    TestCaseWithTransport,
 
60
    TestNotApplicable,
57
61
    )
58
62
from bzrlib.transport import get_transport
59
63
from bzrlib.transport.memory import MemoryTransport
61
65
from bzrlib.versionedfile import (
62
66
    AbsentContentFactory,
63
67
    ConstantMapper,
 
68
    network_bytes_to_kind_and_offset,
64
69
    RecordingVersionedFilesDecorator,
65
70
    )
66
71
 
67
72
 
68
 
class _CompiledKnitFeature(Feature):
69
 
 
70
 
    def _probe(self):
71
 
        try:
72
 
            import bzrlib._knit_load_data_c
73
 
        except ImportError:
74
 
            return False
75
 
        return True
76
 
 
77
 
    def feature_name(self):
78
 
        return 'bzrlib._knit_load_data_c'
79
 
 
80
 
CompiledKnitFeature = _CompiledKnitFeature()
 
73
compiled_knit_feature = tests.ModuleAvailableFeature(
 
74
                            'bzrlib._knit_load_data_pyx')
81
75
 
82
76
 
83
77
class KnitContentTestsMixin(object):
269
263
        return queue_call
270
264
 
271
265
 
 
266
class MockReadvFailingTransport(MockTransport):
 
267
    """Fail in the middle of a readv() result.
 
268
 
 
269
    This Transport will successfully yield the first two requested hunks, but
 
270
    raise NoSuchFile for the rest.
 
271
    """
 
272
 
 
273
    def readv(self, relpath, offsets):
 
274
        count = 0
 
275
        for result in MockTransport.readv(self, relpath, offsets):
 
276
            count += 1
 
277
            # we use 2 because the first offset is the pack header, the second
 
278
            # is the first actual content requset
 
279
            if count > 2:
 
280
                raise errors.NoSuchFile(relpath)
 
281
            yield result
 
282
 
 
283
 
272
284
class KnitRecordAccessTestsMixin(object):
273
285
    """Tests for getting and putting knit records."""
274
286
 
277
289
        access = self.get_access()
278
290
        memos = access.add_raw_records([('key', 10)], '1234567890')
279
291
        self.assertEqual(['1234567890'], list(access.get_raw_records(memos)))
280
 
 
 
292
 
281
293
    def test_add_several_raw_records(self):
282
294
        """add_raw_records with many records and read some back."""
283
295
        access = self.get_access()
303
315
        mapper = ConstantMapper("foo")
304
316
        access = _KnitKeyAccess(self.get_transport(), mapper)
305
317
        return access
306
 
    
 
318
 
 
319
 
 
320
class _TestException(Exception):
 
321
    """Just an exception for local tests to use."""
 
322
 
307
323
 
308
324
class TestPackKnitAccess(TestCaseWithMemoryTransport, KnitRecordAccessTestsMixin):
309
325
    """Tests for the pack based access."""
321
337
        access.set_writer(writer, index, (transport, packname))
322
338
        return access, writer
323
339
 
 
340
    def make_pack_file(self):
 
341
        """Create a pack file with 2 records."""
 
342
        access, writer = self._get_access(packname='packname', index='foo')
 
343
        memos = []
 
344
        memos.extend(access.add_raw_records([('key1', 10)], '1234567890'))
 
345
        memos.extend(access.add_raw_records([('key2', 5)], '12345'))
 
346
        writer.end()
 
347
        return memos
 
348
 
 
349
    def make_vf_for_retrying(self):
 
350
        """Create 3 packs and a reload function.
 
351
 
 
352
        Originally, 2 pack files will have the data, but one will be missing.
 
353
        And then the third will be used in place of the first two if reload()
 
354
        is called.
 
355
 
 
356
        :return: (versioned_file, reload_counter)
 
357
            versioned_file  a KnitVersionedFiles using the packs for access
 
358
        """
 
359
        builder = self.make_branch_builder('.', format="1.9")
 
360
        builder.start_series()
 
361
        builder.build_snapshot('rev-1', None, [
 
362
            ('add', ('', 'root-id', 'directory', None)),
 
363
            ('add', ('file', 'file-id', 'file', 'content\nrev 1\n')),
 
364
            ])
 
365
        builder.build_snapshot('rev-2', ['rev-1'], [
 
366
            ('modify', ('file-id', 'content\nrev 2\n')),
 
367
            ])
 
368
        builder.build_snapshot('rev-3', ['rev-2'], [
 
369
            ('modify', ('file-id', 'content\nrev 3\n')),
 
370
            ])
 
371
        builder.finish_series()
 
372
        b = builder.get_branch()
 
373
        b.lock_write()
 
374
        self.addCleanup(b.unlock)
 
375
        # Pack these three revisions into another pack file, but don't remove
 
376
        # the originals
 
377
        repo = b.repository
 
378
        collection = repo._pack_collection
 
379
        collection.ensure_loaded()
 
380
        orig_packs = collection.packs
 
381
        packer = pack_repo.Packer(collection, orig_packs, '.testpack')
 
382
        new_pack = packer.pack()
 
383
        # forget about the new pack
 
384
        collection.reset()
 
385
        repo.refresh_data()
 
386
        vf = repo.revisions
 
387
        # Set up a reload() function that switches to using the new pack file
 
388
        new_index = new_pack.revision_index
 
389
        access_tuple = new_pack.access_tuple()
 
390
        reload_counter = [0, 0, 0]
 
391
        def reload():
 
392
            reload_counter[0] += 1
 
393
            if reload_counter[1] > 0:
 
394
                # We already reloaded, nothing more to do
 
395
                reload_counter[2] += 1
 
396
                return False
 
397
            reload_counter[1] += 1
 
398
            vf._index._graph_index._indices[:] = [new_index]
 
399
            vf._access._indices.clear()
 
400
            vf._access._indices[new_index] = access_tuple
 
401
            return True
 
402
        # Delete one of the pack files so the data will need to be reloaded. We
 
403
        # will delete the file with 'rev-2' in it
 
404
        trans, name = orig_packs[1].access_tuple()
 
405
        trans.delete(name)
 
406
        # We don't have the index trigger reloading because we want to test
 
407
        # that we reload when the .pack disappears
 
408
        vf._access._reload_func = reload
 
409
        return vf, reload_counter
 
410
 
 
411
    def make_reload_func(self, return_val=True):
 
412
        reload_called = [0]
 
413
        def reload():
 
414
            reload_called[0] += 1
 
415
            return return_val
 
416
        return reload_called, reload
 
417
 
 
418
    def make_retry_exception(self):
 
419
        # We raise a real exception so that sys.exc_info() is properly
 
420
        # populated
 
421
        try:
 
422
            raise _TestException('foobar')
 
423
        except _TestException, e:
 
424
            retry_exc = errors.RetryWithNewPacks(None, reload_occurred=False,
 
425
                                                 exc_info=sys.exc_info())
 
426
        return retry_exc
 
427
 
324
428
    def test_read_from_several_packs(self):
325
429
        access, writer = self._get_access()
326
430
        memos = []
362
466
        writer.end()
363
467
        self.assertEqual(['1234567890'], list(access.get_raw_records(memos)))
364
468
 
 
469
    def test_missing_index_raises_retry(self):
 
470
        memos = self.make_pack_file()
 
471
        transport = self.get_transport()
 
472
        reload_called, reload_func = self.make_reload_func()
 
473
        # Note that the index key has changed from 'foo' to 'bar'
 
474
        access = _DirectPackAccess({'bar':(transport, 'packname')},
 
475
                                   reload_func=reload_func)
 
476
        e = self.assertListRaises(errors.RetryWithNewPacks,
 
477
                                  access.get_raw_records, memos)
 
478
        # Because a key was passed in which does not match our index list, we
 
479
        # assume that the listing was already reloaded
 
480
        self.assertTrue(e.reload_occurred)
 
481
        self.assertIsInstance(e.exc_info, tuple)
 
482
        self.assertIs(e.exc_info[0], KeyError)
 
483
        self.assertIsInstance(e.exc_info[1], KeyError)
 
484
 
 
485
    def test_missing_index_raises_key_error_with_no_reload(self):
 
486
        memos = self.make_pack_file()
 
487
        transport = self.get_transport()
 
488
        # Note that the index key has changed from 'foo' to 'bar'
 
489
        access = _DirectPackAccess({'bar':(transport, 'packname')})
 
490
        e = self.assertListRaises(KeyError, access.get_raw_records, memos)
 
491
 
 
492
    def test_missing_file_raises_retry(self):
 
493
        memos = self.make_pack_file()
 
494
        transport = self.get_transport()
 
495
        reload_called, reload_func = self.make_reload_func()
 
496
        # Note that the 'filename' has been changed to 'different-packname'
 
497
        access = _DirectPackAccess({'foo':(transport, 'different-packname')},
 
498
                                   reload_func=reload_func)
 
499
        e = self.assertListRaises(errors.RetryWithNewPacks,
 
500
                                  access.get_raw_records, memos)
 
501
        # The file has gone missing, so we assume we need to reload
 
502
        self.assertFalse(e.reload_occurred)
 
503
        self.assertIsInstance(e.exc_info, tuple)
 
504
        self.assertIs(e.exc_info[0], errors.NoSuchFile)
 
505
        self.assertIsInstance(e.exc_info[1], errors.NoSuchFile)
 
506
        self.assertEqual('different-packname', e.exc_info[1].path)
 
507
 
 
508
    def test_missing_file_raises_no_such_file_with_no_reload(self):
 
509
        memos = self.make_pack_file()
 
510
        transport = self.get_transport()
 
511
        # Note that the 'filename' has been changed to 'different-packname'
 
512
        access = _DirectPackAccess({'foo':(transport, 'different-packname')})
 
513
        e = self.assertListRaises(errors.NoSuchFile,
 
514
                                  access.get_raw_records, memos)
 
515
 
 
516
    def test_failing_readv_raises_retry(self):
 
517
        memos = self.make_pack_file()
 
518
        transport = self.get_transport()
 
519
        failing_transport = MockReadvFailingTransport(
 
520
                                [transport.get_bytes('packname')])
 
521
        reload_called, reload_func = self.make_reload_func()
 
522
        access = _DirectPackAccess({'foo':(failing_transport, 'packname')},
 
523
                                   reload_func=reload_func)
 
524
        # Asking for a single record will not trigger the Mock failure
 
525
        self.assertEqual(['1234567890'],
 
526
            list(access.get_raw_records(memos[:1])))
 
527
        self.assertEqual(['12345'],
 
528
            list(access.get_raw_records(memos[1:2])))
 
529
        # A multiple offset readv() will fail mid-way through
 
530
        e = self.assertListRaises(errors.RetryWithNewPacks,
 
531
                                  access.get_raw_records, memos)
 
532
        # The file has gone missing, so we assume we need to reload
 
533
        self.assertFalse(e.reload_occurred)
 
534
        self.assertIsInstance(e.exc_info, tuple)
 
535
        self.assertIs(e.exc_info[0], errors.NoSuchFile)
 
536
        self.assertIsInstance(e.exc_info[1], errors.NoSuchFile)
 
537
        self.assertEqual('packname', e.exc_info[1].path)
 
538
 
 
539
    def test_failing_readv_raises_no_such_file_with_no_reload(self):
 
540
        memos = self.make_pack_file()
 
541
        transport = self.get_transport()
 
542
        failing_transport = MockReadvFailingTransport(
 
543
                                [transport.get_bytes('packname')])
 
544
        reload_called, reload_func = self.make_reload_func()
 
545
        access = _DirectPackAccess({'foo':(failing_transport, 'packname')})
 
546
        # Asking for a single record will not trigger the Mock failure
 
547
        self.assertEqual(['1234567890'],
 
548
            list(access.get_raw_records(memos[:1])))
 
549
        self.assertEqual(['12345'],
 
550
            list(access.get_raw_records(memos[1:2])))
 
551
        # A multiple offset readv() will fail mid-way through
 
552
        e = self.assertListRaises(errors.NoSuchFile,
 
553
                                  access.get_raw_records, memos)
 
554
 
 
555
    def test_reload_or_raise_no_reload(self):
 
556
        access = _DirectPackAccess({}, reload_func=None)
 
557
        retry_exc = self.make_retry_exception()
 
558
        # Without a reload_func, we will just re-raise the original exception
 
559
        self.assertRaises(_TestException, access.reload_or_raise, retry_exc)
 
560
 
 
561
    def test_reload_or_raise_reload_changed(self):
 
562
        reload_called, reload_func = self.make_reload_func(return_val=True)
 
563
        access = _DirectPackAccess({}, reload_func=reload_func)
 
564
        retry_exc = self.make_retry_exception()
 
565
        access.reload_or_raise(retry_exc)
 
566
        self.assertEqual([1], reload_called)
 
567
        retry_exc.reload_occurred=True
 
568
        access.reload_or_raise(retry_exc)
 
569
        self.assertEqual([2], reload_called)
 
570
 
 
571
    def test_reload_or_raise_reload_no_change(self):
 
572
        reload_called, reload_func = self.make_reload_func(return_val=False)
 
573
        access = _DirectPackAccess({}, reload_func=reload_func)
 
574
        retry_exc = self.make_retry_exception()
 
575
        # If reload_occurred is False, then we consider it an error to have
 
576
        # reload_func() return False (no changes).
 
577
        self.assertRaises(_TestException, access.reload_or_raise, retry_exc)
 
578
        self.assertEqual([1], reload_called)
 
579
        retry_exc.reload_occurred=True
 
580
        # If reload_occurred is True, then we assume nothing changed because
 
581
        # it had changed earlier, but didn't change again
 
582
        access.reload_or_raise(retry_exc)
 
583
        self.assertEqual([2], reload_called)
 
584
 
 
585
    def test_annotate_retries(self):
 
586
        vf, reload_counter = self.make_vf_for_retrying()
 
587
        # It is a little bit bogus to annotate the Revision VF, but it works,
 
588
        # as we have ancestry stored there
 
589
        key = ('rev-3',)
 
590
        reload_lines = vf.annotate(key)
 
591
        self.assertEqual([1, 1, 0], reload_counter)
 
592
        plain_lines = vf.annotate(key)
 
593
        self.assertEqual([1, 1, 0], reload_counter) # No extra reloading
 
594
        if reload_lines != plain_lines:
 
595
            self.fail('Annotation was not identical with reloading.')
 
596
        # Now delete the packs-in-use, which should trigger another reload, but
 
597
        # this time we just raise an exception because we can't recover
 
598
        for trans, name in vf._access._indices.itervalues():
 
599
            trans.delete(name)
 
600
        self.assertRaises(errors.NoSuchFile, vf.annotate, key)
 
601
        self.assertEqual([2, 1, 1], reload_counter)
 
602
 
 
603
    def test__get_record_map_retries(self):
 
604
        vf, reload_counter = self.make_vf_for_retrying()
 
605
        keys = [('rev-1',), ('rev-2',), ('rev-3',)]
 
606
        records = vf._get_record_map(keys)
 
607
        self.assertEqual(keys, sorted(records.keys()))
 
608
        self.assertEqual([1, 1, 0], reload_counter)
 
609
        # Now delete the packs-in-use, which should trigger another reload, but
 
610
        # this time we just raise an exception because we can't recover
 
611
        for trans, name in vf._access._indices.itervalues():
 
612
            trans.delete(name)
 
613
        self.assertRaises(errors.NoSuchFile, vf._get_record_map, keys)
 
614
        self.assertEqual([2, 1, 1], reload_counter)
 
615
 
 
616
    def test_get_record_stream_retries(self):
 
617
        vf, reload_counter = self.make_vf_for_retrying()
 
618
        keys = [('rev-1',), ('rev-2',), ('rev-3',)]
 
619
        record_stream = vf.get_record_stream(keys, 'topological', False)
 
620
        record = record_stream.next()
 
621
        self.assertEqual(('rev-1',), record.key)
 
622
        self.assertEqual([0, 0, 0], reload_counter)
 
623
        record = record_stream.next()
 
624
        self.assertEqual(('rev-2',), record.key)
 
625
        self.assertEqual([1, 1, 0], reload_counter)
 
626
        record = record_stream.next()
 
627
        self.assertEqual(('rev-3',), record.key)
 
628
        self.assertEqual([1, 1, 0], reload_counter)
 
629
        # Now delete all pack files, and see that we raise the right error
 
630
        for trans, name in vf._access._indices.itervalues():
 
631
            trans.delete(name)
 
632
        self.assertListRaises(errors.NoSuchFile,
 
633
            vf.get_record_stream, keys, 'topological', False)
 
634
 
 
635
    def test_iter_lines_added_or_present_in_keys_retries(self):
 
636
        vf, reload_counter = self.make_vf_for_retrying()
 
637
        keys = [('rev-1',), ('rev-2',), ('rev-3',)]
 
638
        # Unfortunately, iter_lines_added_or_present_in_keys iterates the
 
639
        # result in random order (determined by the iteration order from a
 
640
        # set()), so we don't have any solid way to trigger whether data is
 
641
        # read before or after. However we tried to delete the middle node to
 
642
        # exercise the code well.
 
643
        # What we care about is that all lines are always yielded, but not
 
644
        # duplicated
 
645
        count = 0
 
646
        reload_lines = sorted(vf.iter_lines_added_or_present_in_keys(keys))
 
647
        self.assertEqual([1, 1, 0], reload_counter)
 
648
        # Now do it again, to make sure the result is equivalent
 
649
        plain_lines = sorted(vf.iter_lines_added_or_present_in_keys(keys))
 
650
        self.assertEqual([1, 1, 0], reload_counter) # No extra reloading
 
651
        self.assertEqual(plain_lines, reload_lines)
 
652
        self.assertEqual(21, len(plain_lines))
 
653
        # Now delete all pack files, and see that we raise the right error
 
654
        for trans, name in vf._access._indices.itervalues():
 
655
            trans.delete(name)
 
656
        self.assertListRaises(errors.NoSuchFile,
 
657
            vf.iter_lines_added_or_present_in_keys, keys)
 
658
        self.assertEqual([2, 1, 1], reload_counter)
 
659
 
 
660
    def test_get_record_stream_yields_disk_sorted_order(self):
 
661
        # if we get 'unordered' pick a semi-optimal order for reading. The
 
662
        # order should be grouped by pack file, and then by position in file
 
663
        repo = self.make_repository('test', format='pack-0.92')
 
664
        repo.lock_write()
 
665
        self.addCleanup(repo.unlock)
 
666
        repo.start_write_group()
 
667
        vf = repo.texts
 
668
        vf.add_lines(('f-id', 'rev-5'), [('f-id', 'rev-4')], ['lines\n'])
 
669
        vf.add_lines(('f-id', 'rev-1'), [], ['lines\n'])
 
670
        vf.add_lines(('f-id', 'rev-2'), [('f-id', 'rev-1')], ['lines\n'])
 
671
        repo.commit_write_group()
 
672
        # We inserted them as rev-5, rev-1, rev-2, we should get them back in
 
673
        # the same order
 
674
        stream = vf.get_record_stream([('f-id', 'rev-1'), ('f-id', 'rev-5'),
 
675
                                       ('f-id', 'rev-2')], 'unordered', False)
 
676
        keys = [r.key for r in stream]
 
677
        self.assertEqual([('f-id', 'rev-5'), ('f-id', 'rev-1'),
 
678
                          ('f-id', 'rev-2')], keys)
 
679
        repo.start_write_group()
 
680
        vf.add_lines(('f-id', 'rev-4'), [('f-id', 'rev-3')], ['lines\n'])
 
681
        vf.add_lines(('f-id', 'rev-3'), [('f-id', 'rev-2')], ['lines\n'])
 
682
        vf.add_lines(('f-id', 'rev-6'), [('f-id', 'rev-5')], ['lines\n'])
 
683
        repo.commit_write_group()
 
684
        # Request in random order, to make sure the output order isn't based on
 
685
        # the request
 
686
        request_keys = set(('f-id', 'rev-%d' % i) for i in range(1, 7))
 
687
        stream = vf.get_record_stream(request_keys, 'unordered', False)
 
688
        keys = [r.key for r in stream]
 
689
        # We want to get the keys back in disk order, but it doesn't matter
 
690
        # which pack we read from first. So this can come back in 2 orders
 
691
        alt1 = [('f-id', 'rev-%d' % i) for i in [4, 3, 6, 5, 1, 2]]
 
692
        alt2 = [('f-id', 'rev-%d' % i) for i in [5, 1, 2, 4, 3, 6]]
 
693
        if keys != alt1 and keys != alt2:
 
694
            self.fail('Returned key order did not match either expected order.'
 
695
                      ' expected %s or %s, not %s'
 
696
                      % (alt1, alt2, keys))
 
697
 
365
698
 
366
699
class LowLevelKnitDataTests(TestCase):
367
700
 
372
705
        gz_file.close()
373
706
        return sio.getvalue()
374
707
 
 
708
    def make_multiple_records(self):
 
709
        """Create the content for multiple records."""
 
710
        sha1sum = osutils.sha('foo\nbar\n').hexdigest()
 
711
        total_txt = []
 
712
        gz_txt = self.create_gz_content('version rev-id-1 2 %s\n'
 
713
                                        'foo\n'
 
714
                                        'bar\n'
 
715
                                        'end rev-id-1\n'
 
716
                                        % (sha1sum,))
 
717
        record_1 = (0, len(gz_txt), sha1sum)
 
718
        total_txt.append(gz_txt)
 
719
        sha1sum = osutils.sha('baz\n').hexdigest()
 
720
        gz_txt = self.create_gz_content('version rev-id-2 1 %s\n'
 
721
                                        'baz\n'
 
722
                                        'end rev-id-2\n'
 
723
                                        % (sha1sum,))
 
724
        record_2 = (record_1[1], len(gz_txt), sha1sum)
 
725
        total_txt.append(gz_txt)
 
726
        return total_txt, record_1, record_2
 
727
 
375
728
    def test_valid_knit_data(self):
376
729
        sha1sum = osutils.sha('foo\nbar\n').hexdigest()
377
730
        gz_txt = self.create_gz_content('version rev-id-1 2 %s\n'
391
744
        raw_contents = list(knit._read_records_iter_raw(records))
392
745
        self.assertEqual([(('rev-id-1',), gz_txt, sha1sum)], raw_contents)
393
746
 
 
747
    def test_multiple_records_valid(self):
 
748
        total_txt, record_1, record_2 = self.make_multiple_records()
 
749
        transport = MockTransport([''.join(total_txt)])
 
750
        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
 
751
        knit = KnitVersionedFiles(None, access)
 
752
        records = [(('rev-id-1',), (('rev-id-1',), record_1[0], record_1[1])),
 
753
                   (('rev-id-2',), (('rev-id-2',), record_2[0], record_2[1]))]
 
754
 
 
755
        contents = list(knit._read_records_iter(records))
 
756
        self.assertEqual([(('rev-id-1',), ['foo\n', 'bar\n'], record_1[2]),
 
757
                          (('rev-id-2',), ['baz\n'], record_2[2])],
 
758
                         contents)
 
759
 
 
760
        raw_contents = list(knit._read_records_iter_raw(records))
 
761
        self.assertEqual([(('rev-id-1',), total_txt[0], record_1[2]),
 
762
                          (('rev-id-2',), total_txt[1], record_2[2])],
 
763
                         raw_contents)
 
764
 
394
765
    def test_not_enough_lines(self):
395
766
        sha1sum = osutils.sha('foo\n').hexdigest()
396
767
        # record says 2 lines data says 1
491
862
 
492
863
    def get_knit_index(self, transport, name, mode):
493
864
        mapper = ConstantMapper(name)
494
 
        orig = knit._load_data
495
 
        def reset():
496
 
            knit._load_data = orig
497
 
        self.addCleanup(reset)
498
865
        from bzrlib._knit_load_data_py import _load_data_py
499
 
        knit._load_data = _load_data_py
 
866
        self.overrideAttr(knit, '_load_data', _load_data_py)
500
867
        allow_writes = lambda: 'w' in mode
501
868
        return _KndxIndex(transport, mapper, lambda:None, allow_writes, lambda:True)
502
869
 
718
1085
            call[1][1].getvalue())
719
1086
        self.assertEqual({'create_parent_dir': True}, call[2])
720
1087
 
 
1088
    def assertTotalBuildSize(self, size, keys, positions):
 
1089
        self.assertEqual(size,
 
1090
                         knit._get_total_build_size(None, keys, positions))
 
1091
 
 
1092
    def test__get_total_build_size(self):
 
1093
        positions = {
 
1094
            ('a',): (('fulltext', False), (('a',), 0, 100), None),
 
1095
            ('b',): (('line-delta', False), (('b',), 100, 21), ('a',)),
 
1096
            ('c',): (('line-delta', False), (('c',), 121, 35), ('b',)),
 
1097
            ('d',): (('line-delta', False), (('d',), 156, 12), ('b',)),
 
1098
            }
 
1099
        self.assertTotalBuildSize(100, [('a',)], positions)
 
1100
        self.assertTotalBuildSize(121, [('b',)], positions)
 
1101
        # c needs both a & b
 
1102
        self.assertTotalBuildSize(156, [('c',)], positions)
 
1103
        # we shouldn't count 'b' twice
 
1104
        self.assertTotalBuildSize(156, [('b',), ('c',)], positions)
 
1105
        self.assertTotalBuildSize(133, [('d',)], positions)
 
1106
        self.assertTotalBuildSize(168, [('c',), ('d',)], positions)
 
1107
 
721
1108
    def test_get_position(self):
722
1109
        transport = MockTransport([
723
1110
            _KndxIndex.HEADER,
864
1251
            else:
865
1252
                raise
866
1253
 
 
1254
    def test_scan_unvalidated_index_not_implemented(self):
 
1255
        transport = MockTransport()
 
1256
        index = self.get_knit_index(transport, 'filename', 'r')
 
1257
        self.assertRaises(
 
1258
            NotImplementedError, index.scan_unvalidated_index,
 
1259
            'dummy graph_index')
 
1260
        self.assertRaises(
 
1261
            NotImplementedError, index.get_missing_compression_parents)
 
1262
 
867
1263
    def test_short_line(self):
868
1264
        transport = MockTransport([
869
1265
            _KndxIndex.HEADER,
898
1294
 
899
1295
class LowLevelKnitIndexTests_c(LowLevelKnitIndexTests):
900
1296
 
901
 
    _test_needs_features = [CompiledKnitFeature]
 
1297
    _test_needs_features = [compiled_knit_feature]
902
1298
 
903
1299
    def get_knit_index(self, transport, name, mode):
904
1300
        mapper = ConstantMapper(name)
905
 
        orig = knit._load_data
906
 
        def reset():
907
 
            knit._load_data = orig
908
 
        self.addCleanup(reset)
909
 
        from bzrlib._knit_load_data_c import _load_data_c
910
 
        knit._load_data = _load_data_c
 
1301
        from bzrlib._knit_load_data_pyx import _load_data_c
 
1302
        self.overrideAttr(knit, '_load_data', _load_data_c)
911
1303
        allow_writes = lambda: mode == 'w'
912
 
        return _KndxIndex(transport, mapper, lambda:None, allow_writes, lambda:True)
 
1304
        return _KndxIndex(transport, mapper, lambda:None,
 
1305
                          allow_writes, lambda:True)
 
1306
 
 
1307
 
 
1308
class Test_KnitAnnotator(TestCaseWithMemoryTransport):
 
1309
 
 
1310
    def make_annotator(self):
 
1311
        factory = knit.make_pack_factory(True, True, 1)
 
1312
        vf = factory(self.get_transport())
 
1313
        return knit._KnitAnnotator(vf)
 
1314
 
 
1315
    def test__expand_fulltext(self):
 
1316
        ann = self.make_annotator()
 
1317
        rev_key = ('rev-id',)
 
1318
        ann._num_compression_children[rev_key] = 1
 
1319
        res = ann._expand_record(rev_key, (('parent-id',),), None,
 
1320
                           ['line1\n', 'line2\n'], ('fulltext', True))
 
1321
        # The content object and text lines should be cached appropriately
 
1322
        self.assertEqual(['line1\n', 'line2'], res)
 
1323
        content_obj = ann._content_objects[rev_key]
 
1324
        self.assertEqual(['line1\n', 'line2\n'], content_obj._lines)
 
1325
        self.assertEqual(res, content_obj.text())
 
1326
        self.assertEqual(res, ann._text_cache[rev_key])
 
1327
 
 
1328
    def test__expand_delta_comp_parent_not_available(self):
 
1329
        # Parent isn't available yet, so we return nothing, but queue up this
 
1330
        # node for later processing
 
1331
        ann = self.make_annotator()
 
1332
        rev_key = ('rev-id',)
 
1333
        parent_key = ('parent-id',)
 
1334
        record = ['0,1,1\n', 'new-line\n']
 
1335
        details = ('line-delta', False)
 
1336
        res = ann._expand_record(rev_key, (parent_key,), parent_key,
 
1337
                                 record, details)
 
1338
        self.assertEqual(None, res)
 
1339
        self.assertTrue(parent_key in ann._pending_deltas)
 
1340
        pending = ann._pending_deltas[parent_key]
 
1341
        self.assertEqual(1, len(pending))
 
1342
        self.assertEqual((rev_key, (parent_key,), record, details), pending[0])
 
1343
 
 
1344
    def test__expand_record_tracks_num_children(self):
 
1345
        ann = self.make_annotator()
 
1346
        rev_key = ('rev-id',)
 
1347
        rev2_key = ('rev2-id',)
 
1348
        parent_key = ('parent-id',)
 
1349
        record = ['0,1,1\n', 'new-line\n']
 
1350
        details = ('line-delta', False)
 
1351
        ann._num_compression_children[parent_key] = 2
 
1352
        ann._expand_record(parent_key, (), None, ['line1\n', 'line2\n'],
 
1353
                           ('fulltext', False))
 
1354
        res = ann._expand_record(rev_key, (parent_key,), parent_key,
 
1355
                                 record, details)
 
1356
        self.assertEqual({parent_key: 1}, ann._num_compression_children)
 
1357
        # Expanding the second child should remove the content object, and the
 
1358
        # num_compression_children entry
 
1359
        res = ann._expand_record(rev2_key, (parent_key,), parent_key,
 
1360
                                 record, details)
 
1361
        self.assertFalse(parent_key in ann._content_objects)
 
1362
        self.assertEqual({}, ann._num_compression_children)
 
1363
        # We should not cache the content_objects for rev2 and rev, because
 
1364
        # they do not have compression children of their own.
 
1365
        self.assertEqual({}, ann._content_objects)
 
1366
 
 
1367
    def test__expand_delta_records_blocks(self):
 
1368
        ann = self.make_annotator()
 
1369
        rev_key = ('rev-id',)
 
1370
        parent_key = ('parent-id',)
 
1371
        record = ['0,1,1\n', 'new-line\n']
 
1372
        details = ('line-delta', True)
 
1373
        ann._num_compression_children[parent_key] = 2
 
1374
        ann._expand_record(parent_key, (), None,
 
1375
                           ['line1\n', 'line2\n', 'line3\n'],
 
1376
                           ('fulltext', False))
 
1377
        ann._expand_record(rev_key, (parent_key,), parent_key, record, details)
 
1378
        self.assertEqual({(rev_key, parent_key): [(1, 1, 1), (3, 3, 0)]},
 
1379
                         ann._matching_blocks)
 
1380
        rev2_key = ('rev2-id',)
 
1381
        record = ['0,1,1\n', 'new-line\n']
 
1382
        details = ('line-delta', False)
 
1383
        ann._expand_record(rev2_key, (parent_key,), parent_key, record, details)
 
1384
        self.assertEqual([(1, 1, 2), (3, 3, 0)],
 
1385
                         ann._matching_blocks[(rev2_key, parent_key)])
 
1386
 
 
1387
    def test__get_parent_ann_uses_matching_blocks(self):
 
1388
        ann = self.make_annotator()
 
1389
        rev_key = ('rev-id',)
 
1390
        parent_key = ('parent-id',)
 
1391
        parent_ann = [(parent_key,)]*3
 
1392
        block_key = (rev_key, parent_key)
 
1393
        ann._annotations_cache[parent_key] = parent_ann
 
1394
        ann._matching_blocks[block_key] = [(0, 1, 1), (3, 3, 0)]
 
1395
        # We should not try to access any parent_lines content, because we know
 
1396
        # we already have the matching blocks
 
1397
        par_ann, blocks = ann._get_parent_annotations_and_matches(rev_key,
 
1398
                                        ['1\n', '2\n', '3\n'], parent_key)
 
1399
        self.assertEqual(parent_ann, par_ann)
 
1400
        self.assertEqual([(0, 1, 1), (3, 3, 0)], blocks)
 
1401
        self.assertEqual({}, ann._matching_blocks)
 
1402
 
 
1403
    def test__process_pending(self):
 
1404
        ann = self.make_annotator()
 
1405
        rev_key = ('rev-id',)
 
1406
        p1_key = ('p1-id',)
 
1407
        p2_key = ('p2-id',)
 
1408
        record = ['0,1,1\n', 'new-line\n']
 
1409
        details = ('line-delta', False)
 
1410
        p1_record = ['line1\n', 'line2\n']
 
1411
        ann._num_compression_children[p1_key] = 1
 
1412
        res = ann._expand_record(rev_key, (p1_key,p2_key), p1_key,
 
1413
                                 record, details)
 
1414
        self.assertEqual(None, res)
 
1415
        # self.assertTrue(p1_key in ann._pending_deltas)
 
1416
        self.assertEqual({}, ann._pending_annotation)
 
1417
        # Now insert p1, and we should be able to expand the delta
 
1418
        res = ann._expand_record(p1_key, (), None, p1_record,
 
1419
                                 ('fulltext', False))
 
1420
        self.assertEqual(p1_record, res)
 
1421
        ann._annotations_cache[p1_key] = [(p1_key,)]*2
 
1422
        res = ann._process_pending(p1_key)
 
1423
        self.assertEqual([], res)
 
1424
        self.assertFalse(p1_key in ann._pending_deltas)
 
1425
        self.assertTrue(p2_key in ann._pending_annotation)
 
1426
        self.assertEqual({p2_key: [(rev_key, (p1_key, p2_key))]},
 
1427
                         ann._pending_annotation)
 
1428
        # Now fill in parent 2, and pending annotation should be satisfied
 
1429
        res = ann._expand_record(p2_key, (), None, [], ('fulltext', False))
 
1430
        ann._annotations_cache[p2_key] = []
 
1431
        res = ann._process_pending(p2_key)
 
1432
        self.assertEqual([rev_key], res)
 
1433
        self.assertEqual({}, ann._pending_annotation)
 
1434
        self.assertEqual({}, ann._pending_deltas)
 
1435
 
 
1436
    def test_record_delta_removes_basis(self):
 
1437
        ann = self.make_annotator()
 
1438
        ann._expand_record(('parent-id',), (), None,
 
1439
                           ['line1\n', 'line2\n'], ('fulltext', False))
 
1440
        ann._num_compression_children['parent-id'] = 2
 
1441
 
 
1442
    def test_annotate_special_text(self):
 
1443
        ann = self.make_annotator()
 
1444
        vf = ann._vf
 
1445
        rev1_key = ('rev-1',)
 
1446
        rev2_key = ('rev-2',)
 
1447
        rev3_key = ('rev-3',)
 
1448
        spec_key = ('special:',)
 
1449
        vf.add_lines(rev1_key, [], ['initial content\n'])
 
1450
        vf.add_lines(rev2_key, [rev1_key], ['initial content\n',
 
1451
                                            'common content\n',
 
1452
                                            'content in 2\n'])
 
1453
        vf.add_lines(rev3_key, [rev1_key], ['initial content\n',
 
1454
                                            'common content\n',
 
1455
                                            'content in 3\n'])
 
1456
        spec_text = ('initial content\n'
 
1457
                     'common content\n'
 
1458
                     'content in 2\n'
 
1459
                     'content in 3\n')
 
1460
        ann.add_special_text(spec_key, [rev2_key, rev3_key], spec_text)
 
1461
        anns, lines = ann.annotate(spec_key)
 
1462
        self.assertEqual([(rev1_key,),
 
1463
                          (rev2_key, rev3_key),
 
1464
                          (rev2_key,),
 
1465
                          (rev3_key,),
 
1466
                         ], anns)
 
1467
        self.assertEqualDiff(spec_text, ''.join(lines))
913
1468
 
914
1469
 
915
1470
class KnitTests(TestCaseWithTransport):
920
1475
        return make_file_factory(annotate, mapper)(self.get_transport())
921
1476
 
922
1477
 
 
1478
class TestBadShaError(KnitTests):
 
1479
    """Tests for handling of sha errors."""
 
1480
 
 
1481
    def test_sha_exception_has_text(self):
 
1482
        # having the failed text included in the error allows for recovery.
 
1483
        source = self.make_test_knit()
 
1484
        target = self.make_test_knit(name="target")
 
1485
        if not source._max_delta_chain:
 
1486
            raise TestNotApplicable(
 
1487
                "cannot get delta-caused sha failures without deltas.")
 
1488
        # create a basis
 
1489
        basis = ('basis',)
 
1490
        broken = ('broken',)
 
1491
        source.add_lines(basis, (), ['foo\n'])
 
1492
        source.add_lines(broken, (basis,), ['foo\n', 'bar\n'])
 
1493
        # Seed target with a bad basis text
 
1494
        target.add_lines(basis, (), ['gam\n'])
 
1495
        target.insert_record_stream(
 
1496
            source.get_record_stream([broken], 'unordered', False))
 
1497
        err = self.assertRaises(errors.KnitCorrupt,
 
1498
            target.get_record_stream([broken], 'unordered', True
 
1499
            ).next().get_bytes_as, 'chunked')
 
1500
        self.assertEqual(['gam\n', 'bar\n'], err.content)
 
1501
        # Test for formatting with live data
 
1502
        self.assertStartsWith(str(err), "Knit ")
 
1503
 
 
1504
 
923
1505
class TestKnitIndex(KnitTests):
924
1506
 
925
1507
    def test_add_versions_dictionary_compresses(self):
1125
1707
            [('parent',)])])
1126
1708
        # but neither should have added data:
1127
1709
        self.assertEqual([[], [], [], []], self.caught_entries)
1128
 
        
 
1710
 
1129
1711
    def test_add_version_different_dup(self):
1130
1712
        index = self.two_graph_index(deltas=True, catch_adds=True)
1131
1713
        # change options
1132
1714
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1133
 
            [(('tip',), 'no-eol,line-delta', (None, 0, 100), [('parent',)])])
1134
 
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1135
 
            [(('tip',), 'line-delta,no-eol', (None, 0, 100), [('parent',)])])
 
1715
            [(('tip',), 'line-delta', (None, 0, 100), [('parent',)])])
1136
1716
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1137
1717
            [(('tip',), 'fulltext', (None, 0, 100), [('parent',)])])
1138
1718
        # parents
1139
1719
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1140
1720
            [(('tip',), 'fulltext,no-eol', (None, 0, 100), [])])
1141
1721
        self.assertEqual([], self.caught_entries)
1142
 
        
 
1722
 
1143
1723
    def test_add_versions_nodeltas(self):
1144
1724
        index = self.two_graph_index(catch_adds=True)
1145
1725
        index.add_records([
1187
1767
            [('parent',)])])
1188
1768
        # but neither should have added data.
1189
1769
        self.assertEqual([[], [], [], []], self.caught_entries)
1190
 
        
 
1770
 
1191
1771
    def test_add_versions_different_dup(self):
1192
1772
        index = self.two_graph_index(deltas=True, catch_adds=True)
1193
1773
        # change options
1194
1774
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1195
 
            [(('tip',), 'no-eol,line-delta', (None, 0, 100), [('parent',)])])
1196
 
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1197
 
            [(('tip',), 'line-delta,no-eol', (None, 0, 100), [('parent',)])])
 
1775
            [(('tip',), 'line-delta', (None, 0, 100), [('parent',)])])
1198
1776
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1199
1777
            [(('tip',), 'fulltext', (None, 0, 100), [('parent',)])])
1200
1778
        # parents
1203
1781
        # change options in the second record
1204
1782
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1205
1783
            [(('tip',), 'fulltext,no-eol', (None, 0, 100), [('parent',)]),
1206
 
             (('tip',), 'no-eol,line-delta', (None, 0, 100), [('parent',)])])
 
1784
             (('tip',), 'line-delta', (None, 0, 100), [('parent',)])])
1207
1785
        self.assertEqual([], self.caught_entries)
1208
1786
 
 
1787
    def make_g_index_missing_compression_parent(self):
 
1788
        graph_index = self.make_g_index('missing_comp', 2,
 
1789
            [(('tip', ), ' 100 78',
 
1790
              ([('missing-parent', ), ('ghost', )], [('missing-parent', )]))])
 
1791
        return graph_index
 
1792
 
 
1793
    def make_g_index_missing_parent(self):
 
1794
        graph_index = self.make_g_index('missing_parent', 2,
 
1795
            [(('parent', ), ' 100 78', ([], [])),
 
1796
             (('tip', ), ' 100 78',
 
1797
              ([('parent', ), ('missing-parent', )], [('parent', )])),
 
1798
              ])
 
1799
        return graph_index
 
1800
 
 
1801
    def make_g_index_no_external_refs(self):
 
1802
        graph_index = self.make_g_index('no_external_refs', 2,
 
1803
            [(('rev', ), ' 100 78',
 
1804
              ([('parent', ), ('ghost', )], []))])
 
1805
        return graph_index
 
1806
 
 
1807
    def test_add_good_unvalidated_index(self):
 
1808
        unvalidated = self.make_g_index_no_external_refs()
 
1809
        combined = CombinedGraphIndex([unvalidated])
 
1810
        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
 
1811
        index.scan_unvalidated_index(unvalidated)
 
1812
        self.assertEqual(frozenset(), index.get_missing_compression_parents())
 
1813
 
 
1814
    def test_add_missing_compression_parent_unvalidated_index(self):
 
1815
        unvalidated = self.make_g_index_missing_compression_parent()
 
1816
        combined = CombinedGraphIndex([unvalidated])
 
1817
        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
 
1818
        index.scan_unvalidated_index(unvalidated)
 
1819
        # This also checks that its only the compression parent that is
 
1820
        # examined, otherwise 'ghost' would also be reported as a missing
 
1821
        # parent.
 
1822
        self.assertEqual(
 
1823
            frozenset([('missing-parent',)]),
 
1824
            index.get_missing_compression_parents())
 
1825
 
 
1826
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
1827
        unvalidated = self.make_g_index_missing_parent()
 
1828
        combined = CombinedGraphIndex([unvalidated])
 
1829
        index = _KnitGraphIndex(combined, lambda: True, deltas=True,
 
1830
            track_external_parent_refs=True)
 
1831
        index.scan_unvalidated_index(unvalidated)
 
1832
        self.assertEqual(
 
1833
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
1834
 
 
1835
    def test_track_external_parent_refs(self):
 
1836
        g_index = self.make_g_index('empty', 2, [])
 
1837
        combined = CombinedGraphIndex([g_index])
 
1838
        index = _KnitGraphIndex(combined, lambda: True, deltas=True,
 
1839
            add_callback=self.catch_add, track_external_parent_refs=True)
 
1840
        self.caught_entries = []
 
1841
        index.add_records([
 
1842
            (('new-key',), 'fulltext,no-eol', (None, 50, 60),
 
1843
             [('parent-1',), ('parent-2',)])])
 
1844
        self.assertEqual(
 
1845
            frozenset([('parent-1',), ('parent-2',)]),
 
1846
            index.get_missing_parents())
 
1847
 
 
1848
    def test_add_unvalidated_index_with_present_external_references(self):
 
1849
        index = self.two_graph_index(deltas=True)
 
1850
        # Ugly hack to get at one of the underlying GraphIndex objects that
 
1851
        # two_graph_index built.
 
1852
        unvalidated = index._graph_index._indices[1]
 
1853
        # 'parent' is an external ref of _indices[1] (unvalidated), but is
 
1854
        # present in _indices[0].
 
1855
        index.scan_unvalidated_index(unvalidated)
 
1856
        self.assertEqual(frozenset(), index.get_missing_compression_parents())
 
1857
 
 
1858
    def make_new_missing_parent_g_index(self, name):
 
1859
        missing_parent = name + '-missing-parent'
 
1860
        graph_index = self.make_g_index(name, 2,
 
1861
            [((name + 'tip', ), ' 100 78',
 
1862
              ([(missing_parent, ), ('ghost', )], [(missing_parent, )]))])
 
1863
        return graph_index
 
1864
 
 
1865
    def test_add_mulitiple_unvalidated_indices_with_missing_parents(self):
 
1866
        g_index_1 = self.make_new_missing_parent_g_index('one')
 
1867
        g_index_2 = self.make_new_missing_parent_g_index('two')
 
1868
        combined = CombinedGraphIndex([g_index_1, g_index_2])
 
1869
        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
 
1870
        index.scan_unvalidated_index(g_index_1)
 
1871
        index.scan_unvalidated_index(g_index_2)
 
1872
        self.assertEqual(
 
1873
            frozenset([('one-missing-parent',), ('two-missing-parent',)]),
 
1874
            index.get_missing_compression_parents())
 
1875
 
 
1876
    def test_add_mulitiple_unvalidated_indices_with_mutual_dependencies(self):
 
1877
        graph_index_a = self.make_g_index('one', 2,
 
1878
            [(('parent-one', ), ' 100 78', ([('non-compression-parent',)], [])),
 
1879
             (('child-of-two', ), ' 100 78',
 
1880
              ([('parent-two',)], [('parent-two',)]))])
 
1881
        graph_index_b = self.make_g_index('two', 2,
 
1882
            [(('parent-two', ), ' 100 78', ([('non-compression-parent',)], [])),
 
1883
             (('child-of-one', ), ' 100 78',
 
1884
              ([('parent-one',)], [('parent-one',)]))])
 
1885
        combined = CombinedGraphIndex([graph_index_a, graph_index_b])
 
1886
        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
 
1887
        index.scan_unvalidated_index(graph_index_a)
 
1888
        index.scan_unvalidated_index(graph_index_b)
 
1889
        self.assertEqual(
 
1890
            frozenset([]), index.get_missing_compression_parents())
 
1891
 
1209
1892
 
1210
1893
class TestNoParentsGraphIndexKnit(KnitTests):
1211
1894
    """Tests for knits using _KnitGraphIndex with no parents."""
1219
1902
        size = trans.put_file(name, stream)
1220
1903
        return GraphIndex(trans, name, size)
1221
1904
 
 
1905
    def test_add_good_unvalidated_index(self):
 
1906
        unvalidated = self.make_g_index('unvalidated')
 
1907
        combined = CombinedGraphIndex([unvalidated])
 
1908
        index = _KnitGraphIndex(combined, lambda: True, parents=False)
 
1909
        index.scan_unvalidated_index(unvalidated)
 
1910
        self.assertEqual(frozenset(),
 
1911
            index.get_missing_compression_parents())
 
1912
 
1222
1913
    def test_parents_deltas_incompatible(self):
1223
1914
        index = CombinedGraphIndex([])
1224
1915
        self.assertRaises(errors.KnitError, _KnitGraphIndex, lambda:True,
1305
1996
        index.add_records([(('tip',), 'fulltext,no-eol', (None, 0, 1000), [])])
1306
1997
        # but neither should have added data.
1307
1998
        self.assertEqual([[], [], [], []], self.caught_entries)
1308
 
        
 
1999
 
1309
2000
    def test_add_version_different_dup(self):
1310
2001
        index = self.two_graph_index(catch_adds=True)
1311
2002
        # change options
1319
2010
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1320
2011
            [(('tip',), 'fulltext,no-eol', (None, 0, 100), [('parent',)])])
1321
2012
        self.assertEqual([], self.caught_entries)
1322
 
        
 
2013
 
1323
2014
    def test_add_versions(self):
1324
2015
        index = self.two_graph_index(catch_adds=True)
1325
2016
        index.add_records([
1357
2048
        index.add_records([(('tip',), 'fulltext,no-eol', (None, 0, 1000), [])])
1358
2049
        # but neither should have added data.
1359
2050
        self.assertEqual([[], [], [], []], self.caught_entries)
1360
 
        
 
2051
 
1361
2052
    def test_add_versions_different_dup(self):
1362
2053
        index = self.two_graph_index(catch_adds=True)
1363
2054
        # change options
1377
2068
        self.assertEqual([], self.caught_entries)
1378
2069
 
1379
2070
 
 
2071
class TestKnitVersionedFiles(KnitTests):
 
2072
 
 
2073
    def assertGroupKeysForIo(self, exp_groups, keys, non_local_keys,
 
2074
                             positions, _min_buffer_size=None):
 
2075
        kvf = self.make_test_knit()
 
2076
        if _min_buffer_size is None:
 
2077
            _min_buffer_size = knit._STREAM_MIN_BUFFER_SIZE
 
2078
        self.assertEqual(exp_groups, kvf._group_keys_for_io(keys,
 
2079
                                        non_local_keys, positions,
 
2080
                                        _min_buffer_size=_min_buffer_size))
 
2081
 
 
2082
    def assertSplitByPrefix(self, expected_map, expected_prefix_order,
 
2083
                            keys):
 
2084
        split, prefix_order = KnitVersionedFiles._split_by_prefix(keys)
 
2085
        self.assertEqual(expected_map, split)
 
2086
        self.assertEqual(expected_prefix_order, prefix_order)
 
2087
 
 
2088
    def test__group_keys_for_io(self):
 
2089
        ft_detail = ('fulltext', False)
 
2090
        ld_detail = ('line-delta', False)
 
2091
        f_a = ('f', 'a')
 
2092
        f_b = ('f', 'b')
 
2093
        f_c = ('f', 'c')
 
2094
        g_a = ('g', 'a')
 
2095
        g_b = ('g', 'b')
 
2096
        g_c = ('g', 'c')
 
2097
        positions = {
 
2098
            f_a: (ft_detail, (f_a, 0, 100), None),
 
2099
            f_b: (ld_detail, (f_b, 100, 21), f_a),
 
2100
            f_c: (ld_detail, (f_c, 180, 15), f_b),
 
2101
            g_a: (ft_detail, (g_a, 121, 35), None),
 
2102
            g_b: (ld_detail, (g_b, 156, 12), g_a),
 
2103
            g_c: (ld_detail, (g_c, 195, 13), g_a),
 
2104
            }
 
2105
        self.assertGroupKeysForIo([([f_a], set())],
 
2106
                                  [f_a], [], positions)
 
2107
        self.assertGroupKeysForIo([([f_a], set([f_a]))],
 
2108
                                  [f_a], [f_a], positions)
 
2109
        self.assertGroupKeysForIo([([f_a, f_b], set([]))],
 
2110
                                  [f_a, f_b], [], positions)
 
2111
        self.assertGroupKeysForIo([([f_a, f_b], set([f_b]))],
 
2112
                                  [f_a, f_b], [f_b], positions)
 
2113
        self.assertGroupKeysForIo([([f_a, f_b, g_a, g_b], set())],
 
2114
                                  [f_a, g_a, f_b, g_b], [], positions)
 
2115
        self.assertGroupKeysForIo([([f_a, f_b, g_a, g_b], set())],
 
2116
                                  [f_a, g_a, f_b, g_b], [], positions,
 
2117
                                  _min_buffer_size=150)
 
2118
        self.assertGroupKeysForIo([([f_a, f_b], set()), ([g_a, g_b], set())],
 
2119
                                  [f_a, g_a, f_b, g_b], [], positions,
 
2120
                                  _min_buffer_size=100)
 
2121
        self.assertGroupKeysForIo([([f_c], set()), ([g_b], set())],
 
2122
                                  [f_c, g_b], [], positions,
 
2123
                                  _min_buffer_size=125)
 
2124
        self.assertGroupKeysForIo([([g_b, f_c], set())],
 
2125
                                  [g_b, f_c], [], positions,
 
2126
                                  _min_buffer_size=125)
 
2127
 
 
2128
    def test__split_by_prefix(self):
 
2129
        self.assertSplitByPrefix({'f': [('f', 'a'), ('f', 'b')],
 
2130
                                  'g': [('g', 'b'), ('g', 'a')],
 
2131
                                 }, ['f', 'g'],
 
2132
                                 [('f', 'a'), ('g', 'b'),
 
2133
                                  ('g', 'a'), ('f', 'b')])
 
2134
 
 
2135
        self.assertSplitByPrefix({'f': [('f', 'a'), ('f', 'b')],
 
2136
                                  'g': [('g', 'b'), ('g', 'a')],
 
2137
                                 }, ['f', 'g'],
 
2138
                                 [('f', 'a'), ('f', 'b'),
 
2139
                                  ('g', 'b'), ('g', 'a')])
 
2140
 
 
2141
        self.assertSplitByPrefix({'f': [('f', 'a'), ('f', 'b')],
 
2142
                                  'g': [('g', 'b'), ('g', 'a')],
 
2143
                                 }, ['f', 'g'],
 
2144
                                 [('f', 'a'), ('f', 'b'),
 
2145
                                  ('g', 'b'), ('g', 'a')])
 
2146
 
 
2147
        self.assertSplitByPrefix({'f': [('f', 'a'), ('f', 'b')],
 
2148
                                  'g': [('g', 'b'), ('g', 'a')],
 
2149
                                  '': [('a',), ('b',)]
 
2150
                                 }, ['f', 'g', ''],
 
2151
                                 [('f', 'a'), ('g', 'b'),
 
2152
                                  ('a',), ('b',),
 
2153
                                  ('g', 'a'), ('f', 'b')])
 
2154
 
 
2155
 
1380
2156
class TestStacking(KnitTests):
1381
2157
 
1382
2158
    def get_basis_and_test_knit(self):
1408
2184
        basis.calls = []
1409
2185
        test.add_lines(key_cross_border, (key_basis,), ['foo\n'])
1410
2186
        self.assertEqual('fulltext', test._index.get_method(key_cross_border))
1411
 
        self.assertEqual([("get_parent_map", set([key_basis]))], basis.calls)
 
2187
        # we don't even need to look at the basis to see that this should be
 
2188
        # stored as a fulltext
 
2189
        self.assertEqual([], basis.calls)
1412
2190
        # Subsequent adds do delta.
1413
2191
        basis.calls = []
1414
2192
        test.add_lines(key_delta, (key_cross_border,), ['foo\n'])
1435
2213
        # self.assertEqual([("annotate", key_basis)], basis.calls)
1436
2214
        self.assertEqual([('get_parent_map', set([key_basis])),
1437
2215
            ('get_parent_map', set([key_basis])),
1438
 
            ('get_parent_map', set([key_basis])),
1439
 
            ('get_record_stream', [key_basis], 'unordered', True)],
 
2216
            ('get_record_stream', [key_basis], 'topological', True)],
1440
2217
            basis.calls)
1441
2218
 
1442
2219
    def test_check(self):
1443
2220
        # At the moment checking a stacked knit does implicitly check the
1444
 
        # fallback files.  
 
2221
        # fallback files.
1445
2222
        basis, test = self.get_basis_and_test_knit()
1446
2223
        test.check()
1447
2224
 
1539
2316
                True).next()
1540
2317
            self.assertEqual(record.key, result[0])
1541
2318
            self.assertEqual(record.sha1, result[1])
1542
 
            self.assertEqual(record.storage_kind, result[2])
 
2319
            # We used to check that the storage kind matched, but actually it
 
2320
            # depends on whether it was sourced from the basis, or in a single
 
2321
            # group, because asking for full texts returns proxy objects to a
 
2322
            # _ContentMapGenerator object; so checking the kind is unneeded.
1543
2323
            self.assertEqual(record.get_bytes_as('fulltext'), result[3])
1544
2324
        # It's not strictly minimal, but it seems reasonable for now for it to
1545
2325
        # ask which fallbacks have which parents.
1546
2326
        self.assertEqual([
1547
2327
            ("get_parent_map", set([key_basis, key_basis_2, key_missing])),
1548
 
            # unordered is asked for by the underlying worker as it still
1549
 
            # buffers everything while answering - which is a problem!
1550
 
            ("get_record_stream", [key_basis_2, key_basis], 'unordered', True)],
 
2328
            # topological is requested from the fallback, because that is what
 
2329
            # was requested at the top level.
 
2330
            ("get_record_stream", [key_basis_2, key_basis], 'topological', True)],
1551
2331
            calls)
1552
2332
 
1553
2333
    def test_get_record_stream_unordered_deltas(self):
1669
2449
        source.add_lines(key_delta, (key_basis,), ['bar\n'])
1670
2450
        stream = source.get_record_stream([key_delta], 'unordered', False)
1671
2451
        test.insert_record_stream(stream)
1672
 
        self.assertEqual([("get_parent_map", set([key_basis]))],
 
2452
        # XXX: this does somewhat too many calls in making sure of whether it
 
2453
        # has to recreate the full text.
 
2454
        self.assertEqual([("get_parent_map", set([key_basis])),
 
2455
             ('get_parent_map', set([key_basis])),
 
2456
             ('get_record_stream', [key_basis], 'unordered', True)],
1673
2457
            basis.calls)
1674
2458
        self.assertEqual({key_delta:(key_basis,)},
1675
2459
            test.get_parent_map([key_delta]))
1678
2462
 
1679
2463
    def test_iter_lines_added_or_present_in_keys(self):
1680
2464
        # Lines from the basis are returned, and lines for a given key are only
1681
 
        # returned once. 
 
2465
        # returned once.
1682
2466
        key1 = ('foo1',)
1683
2467
        key2 = ('foo2',)
1684
2468
        # all sources are asked for keys:
1736
2520
        test.add_mpdiffs([(key_delta, (key_basis,),
1737
2521
            source.get_sha1s([key_delta])[key_delta], diffs[0])])
1738
2522
        self.assertEqual([("get_parent_map", set([key_basis])),
1739
 
            ('get_record_stream', [key_basis], 'unordered', True),
1740
 
            ('get_parent_map', set([key_basis]))],
 
2523
            ('get_record_stream', [key_basis], 'unordered', True),],
1741
2524
            basis.calls)
1742
2525
        self.assertEqual({key_delta:(key_basis,)},
1743
2526
            test.get_parent_map([key_delta]))
1762
2545
                multiparent.NewText(['foo\n']),
1763
2546
                multiparent.ParentText(1, 0, 2, 1)])],
1764
2547
            diffs)
1765
 
        self.assertEqual(4, len(basis.calls))
 
2548
        self.assertEqual(3, len(basis.calls))
1766
2549
        self.assertEqual([
1767
2550
            ("get_parent_map", set([key_left, key_right])),
1768
2551
            ("get_parent_map", set([key_left, key_right])),
1769
 
            ("get_parent_map", set([key_left, key_right])),
1770
2552
            ],
1771
 
            basis.calls[:3])
1772
 
        last_call = basis.calls[3]
 
2553
            basis.calls[:-1])
 
2554
        last_call = basis.calls[-1]
1773
2555
        self.assertEqual('get_record_stream', last_call[0])
1774
2556
        self.assertEqual(set([key_left, key_right]), set(last_call[1]))
1775
 
        self.assertEqual('unordered', last_call[2])
 
2557
        self.assertEqual('topological', last_call[2])
1776
2558
        self.assertEqual(True, last_call[3])
 
2559
 
 
2560
 
 
2561
class TestNetworkBehaviour(KnitTests):
 
2562
    """Tests for getting data out of/into knits over the network."""
 
2563
 
 
2564
    def test_include_delta_closure_generates_a_knit_delta_closure(self):
 
2565
        vf = self.make_test_knit(name='test')
 
2566
        # put in three texts, giving ft, delta, delta
 
2567
        vf.add_lines(('base',), (), ['base\n', 'content\n'])
 
2568
        vf.add_lines(('d1',), (('base',),), ['d1\n'])
 
2569
        vf.add_lines(('d2',), (('d1',),), ['d2\n'])
 
2570
        # But heuristics could interfere, so check what happened:
 
2571
        self.assertEqual(['knit-ft-gz', 'knit-delta-gz', 'knit-delta-gz'],
 
2572
            [record.storage_kind for record in
 
2573
             vf.get_record_stream([('base',), ('d1',), ('d2',)],
 
2574
                'topological', False)])
 
2575
        # generate a stream of just the deltas include_delta_closure=True,
 
2576
        # serialise to the network, and check that we get a delta closure on the wire.
 
2577
        stream = vf.get_record_stream([('d1',), ('d2',)], 'topological', True)
 
2578
        netb = [record.get_bytes_as(record.storage_kind) for record in stream]
 
2579
        # The first bytes should be a memo from _ContentMapGenerator, and the
 
2580
        # second bytes should be empty (because its a API proxy not something
 
2581
        # for wire serialisation.
 
2582
        self.assertEqual('', netb[1])
 
2583
        bytes = netb[0]
 
2584
        kind, line_end = network_bytes_to_kind_and_offset(bytes)
 
2585
        self.assertEqual('knit-delta-closure', kind)
 
2586
 
 
2587
 
 
2588
class TestContentMapGenerator(KnitTests):
 
2589
    """Tests for ContentMapGenerator"""
 
2590
 
 
2591
    def test_get_record_stream_gives_records(self):
 
2592
        vf = self.make_test_knit(name='test')
 
2593
        # put in three texts, giving ft, delta, delta
 
2594
        vf.add_lines(('base',), (), ['base\n', 'content\n'])
 
2595
        vf.add_lines(('d1',), (('base',),), ['d1\n'])
 
2596
        vf.add_lines(('d2',), (('d1',),), ['d2\n'])
 
2597
        keys = [('d1',), ('d2',)]
 
2598
        generator = _VFContentMapGenerator(vf, keys,
 
2599
            global_map=vf.get_parent_map(keys))
 
2600
        for record in generator.get_record_stream():
 
2601
            if record.key == ('d1',):
 
2602
                self.assertEqual('d1\n', record.get_bytes_as('fulltext'))
 
2603
            else:
 
2604
                self.assertEqual('d2\n', record.get_bytes_as('fulltext'))
 
2605
 
 
2606
    def test_get_record_stream_kinds_are_raw(self):
 
2607
        vf = self.make_test_knit(name='test')
 
2608
        # put in three texts, giving ft, delta, delta
 
2609
        vf.add_lines(('base',), (), ['base\n', 'content\n'])
 
2610
        vf.add_lines(('d1',), (('base',),), ['d1\n'])
 
2611
        vf.add_lines(('d2',), (('d1',),), ['d2\n'])
 
2612
        keys = [('base',), ('d1',), ('d2',)]
 
2613
        generator = _VFContentMapGenerator(vf, keys,
 
2614
            global_map=vf.get_parent_map(keys))
 
2615
        kinds = {('base',): 'knit-delta-closure',
 
2616
            ('d1',): 'knit-delta-closure-ref',
 
2617
            ('d2',): 'knit-delta-closure-ref',
 
2618
            }
 
2619
        for record in generator.get_record_stream():
 
2620
            self.assertEqual(kinds[record.key], record.storage_kind)