/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/groupcompress_repo.py

  • Committer: Robert Collins
  • Date: 2009-08-04 04:36:34 UTC
  • mfrom: (4583 +trunk)
  • mto: This revision was merged to the branch mainline in revision 4593.
  • Revision ID: robertc@robertcollins.net-20090804043634-2iu9wpcgs273i97s
Merge bzr.dev.

Show diffs side-by-side

added added

removed removed

Lines of Context:
30
30
    osutils,
31
31
    pack,
32
32
    remote,
33
 
    repository,
34
33
    revision as _mod_revision,
35
34
    trace,
36
35
    ui,
39
38
    BTreeGraphIndex,
40
39
    BTreeBuilder,
41
40
    )
42
 
from bzrlib.index import GraphIndex, GraphIndexBuilder
43
41
from bzrlib.groupcompress import (
44
42
    _GCGraphIndex,
45
43
    GroupCompressVersionedFiles,
48
46
    Pack,
49
47
    NewPack,
50
48
    KnitPackRepository,
 
49
    KnitPackStreamSource,
51
50
    PackRootCommitBuilder,
52
51
    RepositoryPackCollection,
53
52
    RepositoryFormatPack,
217
216
            p_id_roots_set = set()
218
217
            stream = source_vf.get_record_stream(keys, 'groupcompress', True)
219
218
            for idx, record in enumerate(stream):
 
219
                # Inventories should always be with revisions; assume success.
220
220
                bytes = record.get_bytes_as('fulltext')
221
221
                chk_inv = inventory.CHKInventory.deserialise(None, bytes,
222
222
                                                             record.key)
293
293
                    stream = source_vf.get_record_stream(cur_keys,
294
294
                                                         'as-requested', True)
295
295
                    for record in stream:
 
296
                        if record.storage_kind == 'absent':
 
297
                            # An absent CHK record: we assume that the missing
 
298
                            # record is in a different pack - e.g. a page not
 
299
                            # altered by the commit we're packing.
 
300
                            continue
296
301
                        bytes = record.get_bytes_as('fulltext')
297
302
                        # We don't care about search_key_func for this code,
298
303
                        # because we only care about external references.
438
443
        #      is grabbing too many keys...
439
444
        text_keys = source_vf.keys()
440
445
        self._copy_stream(source_vf, target_vf, text_keys,
441
 
                          'text', self._get_progress_stream, 4)
 
446
                          'texts', self._get_progress_stream, 4)
442
447
 
443
448
    def _copy_signature_texts(self):
444
449
        source_vf, target_vf = self._build_vfs('signature', False, False)
557
562
    pack_factory = GCPack
558
563
    resumed_pack_factory = ResumedGCPack
559
564
 
560
 
    def _already_packed(self):
561
 
        """Is the collection already packed?"""
562
 
        # Always repack GC repositories for now
563
 
        return False
564
 
 
565
565
    def _execute_pack_operations(self, pack_operations,
566
566
                                 _packer_class=GCCHKPacker,
567
567
                                 reload_func=None):
620
620
        self.inventories = GroupCompressVersionedFiles(
621
621
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
622
622
                add_callback=self._pack_collection.inventory_index.add_callback,
623
 
                parents=True, is_locked=self.is_locked),
 
623
                parents=True, is_locked=self.is_locked,
 
624
                inconsistency_fatal=False),
624
625
            access=self._pack_collection.inventory_index.data_access)
625
626
        self.revisions = GroupCompressVersionedFiles(
626
627
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
632
633
        self.signatures = GroupCompressVersionedFiles(
633
634
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
634
635
                add_callback=self._pack_collection.signature_index.add_callback,
635
 
                parents=False, is_locked=self.is_locked),
 
636
                parents=False, is_locked=self.is_locked,
 
637
                inconsistency_fatal=False),
636
638
            access=self._pack_collection.signature_index.data_access,
637
639
            delta=False)
638
640
        self.texts = GroupCompressVersionedFiles(
639
641
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
640
642
                add_callback=self._pack_collection.text_index.add_callback,
641
 
                parents=True, is_locked=self.is_locked),
 
643
                parents=True, is_locked=self.is_locked,
 
644
                inconsistency_fatal=False),
642
645
            access=self._pack_collection.text_index.data_access)
643
646
        # No parents, individual CHK pages don't have specific ancestry
644
647
        self.chk_bytes = GroupCompressVersionedFiles(
645
648
            _GCGraphIndex(self._pack_collection.chk_index.combined_index,
646
649
                add_callback=self._pack_collection.chk_index.add_callback,
647
 
                parents=False, is_locked=self.is_locked),
 
650
                parents=False, is_locked=self.is_locked,
 
651
                inconsistency_fatal=False),
648
652
            access=self._pack_collection.chk_index.data_access)
 
653
        search_key_name = self._format._serializer.search_key_name
 
654
        search_key_func = chk_map.search_key_registry.get(search_key_name)
 
655
        self.chk_bytes._search_key_func = search_key_func
649
656
        # True when the repository object is 'write locked' (as opposed to the
650
657
        # physical lock only taken out around changes to the pack-names list.)
651
658
        # Another way to represent this would be a decorator around the control
674
681
        return self._inventory_add_lines(revision_id, parents,
675
682
            inv_lines, check_content=False)
676
683
 
 
684
    def _create_inv_from_null(self, delta, revision_id):
 
685
        """This will mutate new_inv directly.
 
686
 
 
687
        This is a simplified form of create_by_apply_delta which knows that all
 
688
        the old values must be None, so everything is a create.
 
689
        """
 
690
        serializer = self._format._serializer
 
691
        new_inv = inventory.CHKInventory(serializer.search_key_name)
 
692
        new_inv.revision_id = revision_id
 
693
        entry_to_bytes = new_inv._entry_to_bytes
 
694
        id_to_entry_dict = {}
 
695
        parent_id_basename_dict = {}
 
696
        for old_path, new_path, file_id, entry in delta:
 
697
            if old_path is not None:
 
698
                raise ValueError('Invalid delta, somebody tried to delete %r'
 
699
                                 ' from the NULL_REVISION'
 
700
                                 % ((old_path, file_id),))
 
701
            if new_path is None:
 
702
                raise ValueError('Invalid delta, delta from NULL_REVISION has'
 
703
                                 ' no new_path %r' % (file_id,))
 
704
            if new_path == '':
 
705
                new_inv.root_id = file_id
 
706
                parent_id_basename_key = ('', '')
 
707
            else:
 
708
                utf8_entry_name = entry.name.encode('utf-8')
 
709
                parent_id_basename_key = (entry.parent_id, utf8_entry_name)
 
710
            new_value = entry_to_bytes(entry)
 
711
            # Populate Caches?
 
712
            # new_inv._path_to_fileid_cache[new_path] = file_id
 
713
            id_to_entry_dict[(file_id,)] = new_value
 
714
            parent_id_basename_dict[parent_id_basename_key] = file_id
 
715
 
 
716
        new_inv._populate_from_dicts(self.chk_bytes, id_to_entry_dict,
 
717
            parent_id_basename_dict, maximum_size=serializer.maximum_size)
 
718
        return new_inv
 
719
 
677
720
    def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
678
721
                               parents, basis_inv=None, propagate_caches=False):
679
722
        """Add a new inventory expressed as a delta against another revision.
699
742
            repository format specific) of the serialized inventory, and the
700
743
            resulting inventory.
701
744
        """
702
 
        if basis_revision_id == _mod_revision.NULL_REVISION:
703
 
            return KnitPackRepository.add_inventory_by_delta(self,
704
 
                basis_revision_id, delta, new_revision_id, parents)
705
745
        if not self.is_in_write_group():
706
746
            raise AssertionError("%r not in write group" % (self,))
707
747
        _mod_revision.check_not_reserved_id(new_revision_id)
708
 
        basis_tree = self.revision_tree(basis_revision_id)
709
 
        basis_tree.lock_read()
710
 
        try:
711
 
            if basis_inv is None:
 
748
        basis_tree = None
 
749
        if basis_inv is None:
 
750
            if basis_revision_id == _mod_revision.NULL_REVISION:
 
751
                new_inv = self._create_inv_from_null(delta, new_revision_id)
 
752
                inv_lines = new_inv.to_lines()
 
753
                return self._inventory_add_lines(new_revision_id, parents,
 
754
                    inv_lines, check_content=False), new_inv
 
755
            else:
 
756
                basis_tree = self.revision_tree(basis_revision_id)
 
757
                basis_tree.lock_read()
712
758
                basis_inv = basis_tree.inventory
 
759
        try:
713
760
            result = basis_inv.create_by_apply_delta(delta, new_revision_id,
714
761
                propagate_caches=propagate_caches)
715
762
            inv_lines = result.to_lines()
716
763
            return self._inventory_add_lines(new_revision_id, parents,
717
764
                inv_lines, check_content=False), result
718
765
        finally:
719
 
            basis_tree.unlock()
 
766
            if basis_tree is not None:
 
767
                basis_tree.unlock()
720
768
 
721
769
    def deserialise_inventory(self, revision_id, bytes):
722
770
        return inventory.CHKInventory.deserialise(self.chk_bytes, bytes,
740
788
        # make it raise to trap naughty direct users.
741
789
        raise NotImplementedError(self._iter_inventory_xmls)
742
790
 
743
 
    def _find_parent_ids_of_revisions(self, revision_ids):
744
 
        # TODO: we probably want to make this a helper that other code can get
745
 
        #       at
746
 
        parent_map = self.get_parent_map(revision_ids)
747
 
        parents = set()
748
 
        map(parents.update, parent_map.itervalues())
749
 
        parents.difference_update(revision_ids)
750
 
        parents.discard(_mod_revision.NULL_REVISION)
751
 
        return parents
752
 
 
753
 
    def _find_present_inventory_ids(self, revision_ids):
754
 
        keys = [(r,) for r in revision_ids]
755
 
        parent_map = self.inventories.get_parent_map(keys)
756
 
        present_inventory_ids = set(k[-1] for k in parent_map)
757
 
        return present_inventory_ids
 
791
    def _find_present_inventory_keys(self, revision_keys):
 
792
        parent_map = self.inventories.get_parent_map(revision_keys)
 
793
        present_inventory_keys = set(k for k in parent_map)
 
794
        return present_inventory_keys
758
795
 
759
796
    def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
760
797
        """Find the file ids and versions affected by revisions.
771
808
        file_id_revisions = {}
772
809
        pb = ui.ui_factory.nested_progress_bar()
773
810
        try:
774
 
            parent_ids = self._find_parent_ids_of_revisions(revision_ids)
775
 
            present_parent_inv_ids = self._find_present_inventory_ids(parent_ids)
 
811
            revision_keys = [(r,) for r in revision_ids]
 
812
            parent_keys = self._find_parent_keys_of_revisions(revision_keys)
 
813
            # TODO: instead of using _find_present_inventory_keys, change the
 
814
            #       code paths to allow missing inventories to be tolerated.
 
815
            #       However, we only want to tolerate missing parent
 
816
            #       inventories, not missing inventories for revision_ids
 
817
            present_parent_inv_keys = self._find_present_inventory_keys(
 
818
                                        parent_keys)
 
819
            present_parent_inv_ids = set(
 
820
                [k[-1] for k in present_parent_inv_keys])
776
821
            uninteresting_root_keys = set()
777
822
            interesting_root_keys = set()
778
 
            inventories_to_read = set(present_parent_inv_ids)
779
 
            inventories_to_read.update(revision_ids)
 
823
            inventories_to_read = set(revision_ids)
 
824
            inventories_to_read.update(present_parent_inv_ids)
780
825
            for inv in self.iter_inventories(inventories_to_read):
781
826
                entry_chk_root_key = inv.id_to_entry.key()
782
827
                if inv.revision_id in present_parent_inv_ids:
850
895
        return super(CHKInventoryRepository, self)._get_source(to_format)
851
896
 
852
897
 
853
 
class GroupCHKStreamSource(repository.StreamSource):
 
898
class GroupCHKStreamSource(KnitPackStreamSource):
854
899
    """Used when both the source and target repo are GroupCHK repos."""
855
900
 
856
901
    def __init__(self, from_repository, to_format):
858
903
        super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
859
904
        self._revision_keys = None
860
905
        self._text_keys = None
 
906
        self._text_fetch_order = 'groupcompress'
861
907
        self._chk_id_roots = None
862
908
        self._chk_p_id_roots = None
863
909
 
902
948
            p_id_roots_set.clear()
903
949
        return ('inventories', _filtered_inv_stream())
904
950
 
905
 
    def _find_present_inventories(self, revision_ids):
906
 
        revision_keys = [(r,) for r in revision_ids]
907
 
        inventories = self.from_repository.inventories
908
 
        present_inventories = inventories.get_parent_map(revision_keys)
909
 
        return [p[-1] for p in present_inventories]
910
 
 
911
 
    def _get_filtered_chk_streams(self, excluded_revision_ids):
 
951
    def _get_filtered_chk_streams(self, excluded_revision_keys):
912
952
        self._text_keys = set()
913
 
        excluded_revision_ids.discard(_mod_revision.NULL_REVISION)
914
 
        if not excluded_revision_ids:
 
953
        excluded_revision_keys.discard(_mod_revision.NULL_REVISION)
 
954
        if not excluded_revision_keys:
915
955
            uninteresting_root_keys = set()
916
956
            uninteresting_pid_root_keys = set()
917
957
        else:
919
959
            # actually present
920
960
            # TODO: Update Repository.iter_inventories() to add
921
961
            #       ignore_missing=True
922
 
            present_ids = self.from_repository._find_present_inventory_ids(
923
 
                            excluded_revision_ids)
924
 
            present_ids = self._find_present_inventories(excluded_revision_ids)
 
962
            present_keys = self.from_repository._find_present_inventory_keys(
 
963
                            excluded_revision_keys)
 
964
            present_ids = [k[-1] for k in present_keys]
925
965
            uninteresting_root_keys = set()
926
966
            uninteresting_pid_root_keys = set()
927
967
            for inv in self.from_repository.iter_inventories(present_ids):
952
992
            self._chk_p_id_roots = None
953
993
        yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
954
994
 
955
 
    def _get_text_stream(self):
956
 
        # Note: We know we don't have to handle adding root keys, because both
957
 
        # the source and target are GCCHK, and those always support rich-roots
958
 
        # We may want to request as 'unordered', in case the source has done a
959
 
        # 'split' packing
960
 
        return ('texts', self.from_repository.texts.get_record_stream(
961
 
                            self._text_keys, 'groupcompress', False))
962
 
 
963
995
    def get_stream(self, search):
964
996
        revision_ids = search.get_keys()
965
997
        for stream_info in self._fetch_revision_texts(revision_ids):
970
1002
        # For now, exclude all parents that are at the edge of ancestry, for
971
1003
        # which we have inventories
972
1004
        from_repo = self.from_repository
973
 
        parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
974
 
        for stream_info in self._get_filtered_chk_streams(parent_ids):
 
1005
        parent_keys = from_repo._find_parent_keys_of_revisions(
 
1006
                        self._revision_keys)
 
1007
        for stream_info in self._get_filtered_chk_streams(parent_keys):
975
1008
            yield stream_info
976
1009
        yield self._get_text_stream()
977
1010
 
995
1028
        # no unavailable texts when the ghost inventories are not filled in.
996
1029
        yield self._get_inventory_stream(missing_inventory_keys,
997
1030
                                         allow_absent=True)
998
 
        # We use the empty set for excluded_revision_ids, to make it clear that
999
 
        # we want to transmit all referenced chk pages.
 
1031
        # We use the empty set for excluded_revision_keys, to make it clear
 
1032
        # that we want to transmit all referenced chk pages.
1000
1033
        for stream_info in self._get_filtered_chk_streams(set()):
1001
1034
            yield stream_info
1002
1035
 
1025
1058
    _fetch_order = 'unordered'
1026
1059
    _fetch_uses_deltas = False # essentially ignored by the groupcompress code.
1027
1060
    fast_deltas = True
 
1061
    pack_compresses = True
1028
1062
 
1029
1063
    def _get_matching_bzrdir(self):
1030
1064
        return bzrdir.format_registry.make_bzrdir('development6-rich-root')
1048
1082
        if not target_format.rich_root_data:
1049
1083
            raise errors.BadConversionTarget(
1050
1084
                'Does not support rich root data.', target_format)
1051
 
        if not getattr(target_format, 'supports_tree_reference', False):
 
1085
        if (self.supports_tree_reference and 
 
1086
            not getattr(target_format, 'supports_tree_reference', False)):
1052
1087
            raise errors.BadConversionTarget(
1053
1088
                'Does not support nested trees', target_format)
1054
1089