217
216
p_id_roots_set = set()
218
217
stream = source_vf.get_record_stream(keys, 'groupcompress', True)
219
218
for idx, record in enumerate(stream):
219
# Inventories should always be with revisions; assume success.
220
220
bytes = record.get_bytes_as('fulltext')
221
221
chk_inv = inventory.CHKInventory.deserialise(None, bytes,
293
293
stream = source_vf.get_record_stream(cur_keys,
294
294
'as-requested', True)
295
295
for record in stream:
296
if record.storage_kind == 'absent':
297
# An absent CHK record: we assume that the missing
298
# record is in a different pack - e.g. a page not
299
# altered by the commit we're packing.
296
301
bytes = record.get_bytes_as('fulltext')
297
302
# We don't care about search_key_func for this code,
298
303
# because we only care about external references.
438
443
# is grabbing too many keys...
439
444
text_keys = source_vf.keys()
440
445
self._copy_stream(source_vf, target_vf, text_keys,
441
'text', self._get_progress_stream, 4)
446
'texts', self._get_progress_stream, 4)
443
448
def _copy_signature_texts(self):
444
449
source_vf, target_vf = self._build_vfs('signature', False, False)
557
562
pack_factory = GCPack
558
563
resumed_pack_factory = ResumedGCPack
560
def _already_packed(self):
561
"""Is the collection already packed?"""
562
# Always repack GC repositories for now
565
565
def _execute_pack_operations(self, pack_operations,
566
566
_packer_class=GCCHKPacker,
567
567
reload_func=None):
620
620
self.inventories = GroupCompressVersionedFiles(
621
621
_GCGraphIndex(self._pack_collection.inventory_index.combined_index,
622
622
add_callback=self._pack_collection.inventory_index.add_callback,
623
parents=True, is_locked=self.is_locked),
623
parents=True, is_locked=self.is_locked,
624
inconsistency_fatal=False),
624
625
access=self._pack_collection.inventory_index.data_access)
625
626
self.revisions = GroupCompressVersionedFiles(
626
627
_GCGraphIndex(self._pack_collection.revision_index.combined_index,
632
633
self.signatures = GroupCompressVersionedFiles(
633
634
_GCGraphIndex(self._pack_collection.signature_index.combined_index,
634
635
add_callback=self._pack_collection.signature_index.add_callback,
635
parents=False, is_locked=self.is_locked),
636
parents=False, is_locked=self.is_locked,
637
inconsistency_fatal=False),
636
638
access=self._pack_collection.signature_index.data_access,
638
640
self.texts = GroupCompressVersionedFiles(
639
641
_GCGraphIndex(self._pack_collection.text_index.combined_index,
640
642
add_callback=self._pack_collection.text_index.add_callback,
641
parents=True, is_locked=self.is_locked),
643
parents=True, is_locked=self.is_locked,
644
inconsistency_fatal=False),
642
645
access=self._pack_collection.text_index.data_access)
643
646
# No parents, individual CHK pages don't have specific ancestry
644
647
self.chk_bytes = GroupCompressVersionedFiles(
645
648
_GCGraphIndex(self._pack_collection.chk_index.combined_index,
646
649
add_callback=self._pack_collection.chk_index.add_callback,
647
parents=False, is_locked=self.is_locked),
650
parents=False, is_locked=self.is_locked,
651
inconsistency_fatal=False),
648
652
access=self._pack_collection.chk_index.data_access)
653
search_key_name = self._format._serializer.search_key_name
654
search_key_func = chk_map.search_key_registry.get(search_key_name)
655
self.chk_bytes._search_key_func = search_key_func
649
656
# True when the repository object is 'write locked' (as opposed to the
650
657
# physical lock only taken out around changes to the pack-names list.)
651
658
# Another way to represent this would be a decorator around the control
674
681
return self._inventory_add_lines(revision_id, parents,
675
682
inv_lines, check_content=False)
684
def _create_inv_from_null(self, delta, revision_id):
685
"""This will mutate new_inv directly.
687
This is a simplified form of create_by_apply_delta which knows that all
688
the old values must be None, so everything is a create.
690
serializer = self._format._serializer
691
new_inv = inventory.CHKInventory(serializer.search_key_name)
692
new_inv.revision_id = revision_id
693
entry_to_bytes = new_inv._entry_to_bytes
694
id_to_entry_dict = {}
695
parent_id_basename_dict = {}
696
for old_path, new_path, file_id, entry in delta:
697
if old_path is not None:
698
raise ValueError('Invalid delta, somebody tried to delete %r'
699
' from the NULL_REVISION'
700
% ((old_path, file_id),))
702
raise ValueError('Invalid delta, delta from NULL_REVISION has'
703
' no new_path %r' % (file_id,))
705
new_inv.root_id = file_id
706
parent_id_basename_key = ('', '')
708
utf8_entry_name = entry.name.encode('utf-8')
709
parent_id_basename_key = (entry.parent_id, utf8_entry_name)
710
new_value = entry_to_bytes(entry)
712
# new_inv._path_to_fileid_cache[new_path] = file_id
713
id_to_entry_dict[(file_id,)] = new_value
714
parent_id_basename_dict[parent_id_basename_key] = file_id
716
new_inv._populate_from_dicts(self.chk_bytes, id_to_entry_dict,
717
parent_id_basename_dict, maximum_size=serializer.maximum_size)
677
720
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
678
721
parents, basis_inv=None, propagate_caches=False):
679
722
"""Add a new inventory expressed as a delta against another revision.
699
742
repository format specific) of the serialized inventory, and the
700
743
resulting inventory.
702
if basis_revision_id == _mod_revision.NULL_REVISION:
703
return KnitPackRepository.add_inventory_by_delta(self,
704
basis_revision_id, delta, new_revision_id, parents)
705
745
if not self.is_in_write_group():
706
746
raise AssertionError("%r not in write group" % (self,))
707
747
_mod_revision.check_not_reserved_id(new_revision_id)
708
basis_tree = self.revision_tree(basis_revision_id)
709
basis_tree.lock_read()
711
if basis_inv is None:
749
if basis_inv is None:
750
if basis_revision_id == _mod_revision.NULL_REVISION:
751
new_inv = self._create_inv_from_null(delta, new_revision_id)
752
inv_lines = new_inv.to_lines()
753
return self._inventory_add_lines(new_revision_id, parents,
754
inv_lines, check_content=False), new_inv
756
basis_tree = self.revision_tree(basis_revision_id)
757
basis_tree.lock_read()
712
758
basis_inv = basis_tree.inventory
713
760
result = basis_inv.create_by_apply_delta(delta, new_revision_id,
714
761
propagate_caches=propagate_caches)
715
762
inv_lines = result.to_lines()
716
763
return self._inventory_add_lines(new_revision_id, parents,
717
764
inv_lines, check_content=False), result
766
if basis_tree is not None:
721
769
def deserialise_inventory(self, revision_id, bytes):
722
770
return inventory.CHKInventory.deserialise(self.chk_bytes, bytes,
740
788
# make it raise to trap naughty direct users.
741
789
raise NotImplementedError(self._iter_inventory_xmls)
743
def _find_parent_ids_of_revisions(self, revision_ids):
744
# TODO: we probably want to make this a helper that other code can get
746
parent_map = self.get_parent_map(revision_ids)
748
map(parents.update, parent_map.itervalues())
749
parents.difference_update(revision_ids)
750
parents.discard(_mod_revision.NULL_REVISION)
753
def _find_present_inventory_ids(self, revision_ids):
754
keys = [(r,) for r in revision_ids]
755
parent_map = self.inventories.get_parent_map(keys)
756
present_inventory_ids = set(k[-1] for k in parent_map)
757
return present_inventory_ids
791
def _find_present_inventory_keys(self, revision_keys):
792
parent_map = self.inventories.get_parent_map(revision_keys)
793
present_inventory_keys = set(k for k in parent_map)
794
return present_inventory_keys
759
796
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
760
797
"""Find the file ids and versions affected by revisions.
771
808
file_id_revisions = {}
772
809
pb = ui.ui_factory.nested_progress_bar()
774
parent_ids = self._find_parent_ids_of_revisions(revision_ids)
775
present_parent_inv_ids = self._find_present_inventory_ids(parent_ids)
811
revision_keys = [(r,) for r in revision_ids]
812
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
813
# TODO: instead of using _find_present_inventory_keys, change the
814
# code paths to allow missing inventories to be tolerated.
815
# However, we only want to tolerate missing parent
816
# inventories, not missing inventories for revision_ids
817
present_parent_inv_keys = self._find_present_inventory_keys(
819
present_parent_inv_ids = set(
820
[k[-1] for k in present_parent_inv_keys])
776
821
uninteresting_root_keys = set()
777
822
interesting_root_keys = set()
778
inventories_to_read = set(present_parent_inv_ids)
779
inventories_to_read.update(revision_ids)
823
inventories_to_read = set(revision_ids)
824
inventories_to_read.update(present_parent_inv_ids)
780
825
for inv in self.iter_inventories(inventories_to_read):
781
826
entry_chk_root_key = inv.id_to_entry.key()
782
827
if inv.revision_id in present_parent_inv_ids:
850
895
return super(CHKInventoryRepository, self)._get_source(to_format)
853
class GroupCHKStreamSource(repository.StreamSource):
898
class GroupCHKStreamSource(KnitPackStreamSource):
854
899
"""Used when both the source and target repo are GroupCHK repos."""
856
901
def __init__(self, from_repository, to_format):
858
903
super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
859
904
self._revision_keys = None
860
905
self._text_keys = None
906
self._text_fetch_order = 'groupcompress'
861
907
self._chk_id_roots = None
862
908
self._chk_p_id_roots = None
902
948
p_id_roots_set.clear()
903
949
return ('inventories', _filtered_inv_stream())
905
def _find_present_inventories(self, revision_ids):
906
revision_keys = [(r,) for r in revision_ids]
907
inventories = self.from_repository.inventories
908
present_inventories = inventories.get_parent_map(revision_keys)
909
return [p[-1] for p in present_inventories]
911
def _get_filtered_chk_streams(self, excluded_revision_ids):
951
def _get_filtered_chk_streams(self, excluded_revision_keys):
912
952
self._text_keys = set()
913
excluded_revision_ids.discard(_mod_revision.NULL_REVISION)
914
if not excluded_revision_ids:
953
excluded_revision_keys.discard(_mod_revision.NULL_REVISION)
954
if not excluded_revision_keys:
915
955
uninteresting_root_keys = set()
916
956
uninteresting_pid_root_keys = set()
919
959
# actually present
920
960
# TODO: Update Repository.iter_inventories() to add
921
961
# ignore_missing=True
922
present_ids = self.from_repository._find_present_inventory_ids(
923
excluded_revision_ids)
924
present_ids = self._find_present_inventories(excluded_revision_ids)
962
present_keys = self.from_repository._find_present_inventory_keys(
963
excluded_revision_keys)
964
present_ids = [k[-1] for k in present_keys]
925
965
uninteresting_root_keys = set()
926
966
uninteresting_pid_root_keys = set()
927
967
for inv in self.from_repository.iter_inventories(present_ids):
952
992
self._chk_p_id_roots = None
953
993
yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
955
def _get_text_stream(self):
956
# Note: We know we don't have to handle adding root keys, because both
957
# the source and target are GCCHK, and those always support rich-roots
958
# We may want to request as 'unordered', in case the source has done a
960
return ('texts', self.from_repository.texts.get_record_stream(
961
self._text_keys, 'groupcompress', False))
963
995
def get_stream(self, search):
964
996
revision_ids = search.get_keys()
965
997
for stream_info in self._fetch_revision_texts(revision_ids):
970
1002
# For now, exclude all parents that are at the edge of ancestry, for
971
1003
# which we have inventories
972
1004
from_repo = self.from_repository
973
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
974
for stream_info in self._get_filtered_chk_streams(parent_ids):
1005
parent_keys = from_repo._find_parent_keys_of_revisions(
1006
self._revision_keys)
1007
for stream_info in self._get_filtered_chk_streams(parent_keys):
975
1008
yield stream_info
976
1009
yield self._get_text_stream()
995
1028
# no unavailable texts when the ghost inventories are not filled in.
996
1029
yield self._get_inventory_stream(missing_inventory_keys,
997
1030
allow_absent=True)
998
# We use the empty set for excluded_revision_ids, to make it clear that
999
# we want to transmit all referenced chk pages.
1031
# We use the empty set for excluded_revision_keys, to make it clear
1032
# that we want to transmit all referenced chk pages.
1000
1033
for stream_info in self._get_filtered_chk_streams(set()):
1001
1034
yield stream_info
1048
1082
if not target_format.rich_root_data:
1049
1083
raise errors.BadConversionTarget(
1050
1084
'Does not support rich root data.', target_format)
1051
if not getattr(target_format, 'supports_tree_reference', False):
1085
if (self.supports_tree_reference and
1086
not getattr(target_format, 'supports_tree_reference', False)):
1052
1087
raise errors.BadConversionTarget(
1053
1088
'Does not support nested trees', target_format)