87
88
Pack.__init__(self,
88
# Revisions: parents list, no text compression.
89
index_builder_class(reference_lists=1),
90
# Inventory: We want to map compression only, but currently the
91
# knit code hasn't been updated enough to understand that, so we
92
# have a regular 2-list index giving parents and compression
94
index_builder_class(reference_lists=1),
95
# Texts: per file graph, for all fileids - so one reference list
96
# and two elements in the key tuple.
97
index_builder_class(reference_lists=1, key_elements=2),
98
# Signatures: Just blobs to store, no compression, no parents
100
index_builder_class(reference_lists=0),
101
# CHK based storage - just blobs, no compression or parents.
89
# Revisions: parents list, no text compression.
90
index_builder_class(reference_lists=1),
91
# Inventory: We want to map compression only, but currently the
92
# knit code hasn't been updated enough to understand that, so we
93
# have a regular 2-list index giving parents and compression
95
index_builder_class(reference_lists=1),
96
# Texts: per file graph, for all fileids - so one reference list
97
# and two elements in the key tuple.
98
index_builder_class(reference_lists=1, key_elements=2),
99
# Signatures: Just blobs to store, no compression, no parents
101
index_builder_class(reference_lists=0),
102
# CHK based storage - just blobs, no compression or parents.
104
105
self._pack_collection = pack_collection
105
106
# When we make readonly indices, we need this.
106
107
self.index_class = pack_collection._index_class
625
627
# First, copy the existing CHKs on the assumption that most of them
626
628
# will be correct. This will save us from having to reinsert (and
627
629
# recompress) these records later at the cost of perhaps preserving a
629
631
# (Iterate but don't insert _get_filtered_inv_stream to populate the
630
632
# variables needed by GCCHKPacker._copy_chk_texts.)
631
633
self._exhaust_stream(source_vf, inventory_keys, 'inventories',
632
self._get_filtered_inv_stream, 2)
634
self._get_filtered_inv_stream, 2)
633
635
GCCHKPacker._copy_chk_texts(self)
634
636
# Now copy and fix the inventories, and any regenerated CHKs.
636
637
def chk_canonicalizing_inv_stream(source_vf, keys, message, pb=None):
637
638
return self._get_filtered_canonicalizing_inv_stream(
638
639
source_vf, keys, message, pb, source_chk_vf, target_chk_vf)
646
647
def _get_filtered_canonicalizing_inv_stream(self, source_vf, keys, message,
647
pb=None, source_chk_vf=None, target_chk_vf=None):
648
pb=None, source_chk_vf=None, target_chk_vf=None):
648
649
"""Filter the texts of inventories, regenerating CHKs to make sure they
651
652
total_keys = len(keys)
652
653
target_chk_vf = versionedfile.NoDupeAddLinesDecorator(target_chk_vf)
654
654
def _filtered_inv_stream():
655
655
stream = source_vf.get_record_stream(keys, 'groupcompress', True)
656
656
search_key_name = None
657
657
for idx, record in enumerate(stream):
658
658
# Inventories should always be with revisions; assume success.
659
lines = record.get_bytes_as('lines')
659
bytes = record.get_bytes_as('fulltext')
660
660
chk_inv = inventory.CHKInventory.deserialise(
661
source_chk_vf, lines, record.key)
661
source_chk_vf, bytes, record.key)
662
662
if pb is not None:
663
663
pb.update('inv', idx, total_keys)
664
664
chk_inv.id_to_entry._ensure_root()
665
665
if search_key_name is None:
666
666
# Find the name corresponding to the search_key_func
667
667
search_key_reg = chk_map.search_key_registry
668
for search_key_name, func in search_key_reg.items():
668
for search_key_name, func in search_key_reg.iteritems():
669
669
if func == chk_inv.id_to_entry._search_key_func:
671
671
canonical_inv = inventory.CHKInventory.from_inventory(
789
788
for interesting_rec, interesting_map in chk_diff:
791
except errors.NoSuchRevision as e:
790
except errors.NoSuchRevision, e:
793
792
"missing chk node(s) for parent_id_basename_to_file_id maps")
794
793
present_text_keys = no_fallback_texts_index.get_parent_map(text_keys)
795
794
missing_text_keys = text_keys.difference(present_text_keys)
796
795
if missing_text_keys:
797
796
problems.append("missing text keys: %r"
798
% (sorted(missing_text_keys),))
797
% (sorted(missing_text_keys),))
802
801
class CHKInventoryRepository(PackRepository):
803
802
"""subclass of PackRepository that uses CHK based inventories."""
805
def __init__(self, _format, a_controldir, control_files, _commit_builder_class,
804
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
807
806
"""Overridden to change pack collection class."""
808
super(CHKInventoryRepository, self).__init__(_format, a_controldir,
809
control_files, _commit_builder_class, _serializer)
807
super(CHKInventoryRepository, self).__init__(_format, a_bzrdir,
808
control_files, _commit_builder_class, _serializer)
810
809
index_transport = self._transport.clone('indices')
811
810
self._pack_collection = GCRepositoryPackCollection(self,
812
self._transport, index_transport,
813
self._transport.clone(
815
self._transport.clone(
817
_format.index_builder_class,
819
use_chk_index=self._format.supports_chks,
811
self._transport, index_transport,
812
self._transport.clone('upload'),
813
self._transport.clone('packs'),
814
_format.index_builder_class,
816
use_chk_index=self._format.supports_chks,
821
818
self.inventories = GroupCompressVersionedFiles(
822
819
_GCGraphIndex(self._pack_collection.inventory_index.combined_index,
823
add_callback=self._pack_collection.inventory_index.add_callback,
824
parents=True, is_locked=self.is_locked,
825
inconsistency_fatal=False),
820
add_callback=self._pack_collection.inventory_index.add_callback,
821
parents=True, is_locked=self.is_locked,
822
inconsistency_fatal=False),
826
823
access=self._pack_collection.inventory_index.data_access)
827
824
self.revisions = GroupCompressVersionedFiles(
828
825
_GCGraphIndex(self._pack_collection.revision_index.combined_index,
829
add_callback=self._pack_collection.revision_index.add_callback,
830
parents=True, is_locked=self.is_locked,
831
track_external_parent_refs=True, track_new_keys=True),
826
add_callback=self._pack_collection.revision_index.add_callback,
827
parents=True, is_locked=self.is_locked,
828
track_external_parent_refs=True, track_new_keys=True),
832
829
access=self._pack_collection.revision_index.data_access,
834
831
self.signatures = GroupCompressVersionedFiles(
835
832
_GCGraphIndex(self._pack_collection.signature_index.combined_index,
836
add_callback=self._pack_collection.signature_index.add_callback,
837
parents=False, is_locked=self.is_locked,
838
inconsistency_fatal=False),
833
add_callback=self._pack_collection.signature_index.add_callback,
834
parents=False, is_locked=self.is_locked,
835
inconsistency_fatal=False),
839
836
access=self._pack_collection.signature_index.data_access,
841
838
self.texts = GroupCompressVersionedFiles(
842
839
_GCGraphIndex(self._pack_collection.text_index.combined_index,
843
add_callback=self._pack_collection.text_index.add_callback,
844
parents=True, is_locked=self.is_locked,
845
inconsistency_fatal=False),
840
add_callback=self._pack_collection.text_index.add_callback,
841
parents=True, is_locked=self.is_locked,
842
inconsistency_fatal=False),
846
843
access=self._pack_collection.text_index.data_access)
847
844
# No parents, individual CHK pages don't have specific ancestry
848
845
self.chk_bytes = GroupCompressVersionedFiles(
849
846
_GCGraphIndex(self._pack_collection.chk_index.combined_index,
850
add_callback=self._pack_collection.chk_index.add_callback,
851
parents=False, is_locked=self.is_locked,
852
inconsistency_fatal=False),
847
add_callback=self._pack_collection.chk_index.add_callback,
848
parents=False, is_locked=self.is_locked,
849
inconsistency_fatal=False),
853
850
access=self._pack_collection.chk_index.data_access)
854
851
search_key_name = self._format._serializer.search_key_name
855
852
search_key_func = chk_map.search_key_registry.get(search_key_name)
949
946
raise AssertionError("%r not in write group" % (self,))
950
947
_mod_revision.check_not_reserved_id(new_revision_id)
951
948
basis_tree = None
952
if basis_inv is None or not isinstance(basis_inv, inventory.CHKInventory):
949
if basis_inv is None:
953
950
if basis_revision_id == _mod_revision.NULL_REVISION:
954
951
new_inv = self._create_inv_from_null(delta, new_revision_id)
955
952
if new_inv.root_id is None:
956
953
raise errors.RootMissing()
957
954
inv_lines = new_inv.to_lines()
958
955
return self._inventory_add_lines(new_revision_id, parents,
959
inv_lines, check_content=False), new_inv
956
inv_lines, check_content=False), new_inv
961
958
basis_tree = self.revision_tree(basis_revision_id)
962
959
basis_tree.lock_read()
963
960
basis_inv = basis_tree.root_inventory
965
962
result = basis_inv.create_by_apply_delta(delta, new_revision_id,
966
propagate_caches=propagate_caches)
963
propagate_caches=propagate_caches)
967
964
inv_lines = result.to_lines()
968
965
return self._inventory_add_lines(new_revision_id, parents,
969
inv_lines, check_content=False), result
966
inv_lines, check_content=False), result
971
968
if basis_tree is not None:
972
969
basis_tree.unlock()
974
def _deserialise_inventory(self, revision_id, lines):
975
return inventory.CHKInventory.deserialise(self.chk_bytes, lines,
971
def _deserialise_inventory(self, revision_id, bytes):
972
return inventory.CHKInventory.deserialise(self.chk_bytes, bytes,
978
975
def _iter_inventories(self, revision_ids, ordering):
979
976
"""Iterate over many inventory objects."""
1221
1224
uninteresting_pid_root_keys.add(
1222
1225
inv.parent_id_basename_to_file_id.key())
1223
1226
chk_bytes = self.from_repository.chk_bytes
1225
1227
def _filter_id_to_entry():
1226
1228
interesting_nodes = chk_map.iter_interesting_nodes(chk_bytes,
1227
self._chk_id_roots, uninteresting_root_keys)
1229
self._chk_id_roots, uninteresting_root_keys)
1228
1230
for record in _filter_text_keys(interesting_nodes, self._text_keys,
1229
chk_map._bytes_to_text_key):
1231
chk_map._bytes_to_text_key):
1230
1232
if record is not None:
1233
1235
self._chk_id_roots = None
1234
1236
yield 'chk_bytes', _filter_id_to_entry()
1236
1237
def _get_parent_id_basename_to_file_id_pages():
1237
1238
for record, items in chk_map.iter_interesting_nodes(chk_bytes,
1238
self._chk_p_id_roots, uninteresting_pid_root_keys):
1239
self._chk_p_id_roots, uninteresting_pid_root_keys):
1239
1240
if record is not None:
1264
1265
revision_ids = search.get_keys()
1265
with ui.ui_factory.nested_progress_bar() as pb:
1266
rc = self._record_counter
1267
self._record_counter.setup(len(revision_ids))
1268
for stream_info in self._fetch_revision_texts(revision_ids):
1269
yield (stream_info[0],
1270
wrap_and_count(pb, rc, stream_info[1]))
1271
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
1272
# TODO: The keys to exclude might be part of the search recipe
1273
# For now, exclude all parents that are at the edge of ancestry, for
1274
# which we have inventories
1275
from_repo = self.from_repository
1276
parent_keys = from_repo._find_parent_keys_of_revisions(
1277
self._revision_keys)
1278
self.from_repository.revisions.clear_cache()
1279
self.from_repository.signatures.clear_cache()
1280
# Clear the repo's get_parent_map cache too.
1281
self.from_repository._unstacked_provider.disable_cache()
1282
self.from_repository._unstacked_provider.enable_cache()
1283
s = self._get_inventory_stream(self._revision_keys)
1284
yield (s[0], wrap_and_count(pb, rc, s[1]))
1285
self.from_repository.inventories.clear_cache()
1286
for stream_info in self._get_filtered_chk_streams(parent_keys):
1287
yield (stream_info[0], wrap_and_count(pb, rc, stream_info[1]))
1288
self.from_repository.chk_bytes.clear_cache()
1289
s = self._get_text_stream()
1290
yield (s[0], wrap_and_count(pb, rc, s[1]))
1291
self.from_repository.texts.clear_cache()
1292
pb.update('Done', rc.max, rc.max)
1266
pb = ui.ui_factory.nested_progress_bar()
1267
rc = self._record_counter
1268
self._record_counter.setup(len(revision_ids))
1269
for stream_info in self._fetch_revision_texts(revision_ids):
1270
yield (stream_info[0],
1271
wrap_and_count(pb, rc, stream_info[1]))
1272
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
1273
# TODO: The keys to exclude might be part of the search recipe
1274
# For now, exclude all parents that are at the edge of ancestry, for
1275
# which we have inventories
1276
from_repo = self.from_repository
1277
parent_keys = from_repo._find_parent_keys_of_revisions(
1278
self._revision_keys)
1279
self.from_repository.revisions.clear_cache()
1280
self.from_repository.signatures.clear_cache()
1281
# Clear the repo's get_parent_map cache too.
1282
self.from_repository._unstacked_provider.disable_cache()
1283
self.from_repository._unstacked_provider.enable_cache()
1284
s = self._get_inventory_stream(self._revision_keys)
1285
yield (s[0], wrap_and_count(pb, rc, s[1]))
1286
self.from_repository.inventories.clear_cache()
1287
for stream_info in self._get_filtered_chk_streams(parent_keys):
1288
yield (stream_info[0], wrap_and_count(pb, rc, stream_info[1]))
1289
self.from_repository.chk_bytes.clear_cache()
1290
s = self._get_text_stream()
1291
yield (s[0], wrap_and_count(pb, rc, s[1]))
1292
self.from_repository.texts.clear_cache()
1293
pb.update('Done', rc.max, rc.max)
1294
1296
def get_stream_for_missing_keys(self, missing_keys):
1295
1297
# missing keys can only occur when we are byte copying and not
1379
1381
# multiple in-a-row (and sharing strings). Topological is better
1380
1382
# for remote, because we access less data.
1381
1383
_fetch_order = 'unordered'
1382
# essentially ignored by the groupcompress code.
1383
_fetch_uses_deltas = False
1384
_fetch_uses_deltas = False # essentially ignored by the groupcompress code.
1384
1385
fast_deltas = True
1385
1386
pack_compresses = True
1386
supports_tree_reference = True
1388
1388
def _get_matching_bzrdir(self):
1389
return controldir.format_registry.make_controldir('2a')
1389
return controldir.format_registry.make_bzrdir('2a')
1391
1391
def _ignore_setting_bzrdir(self, format):
1394
_matchingcontroldir = property(
1395
_get_matching_bzrdir, _ignore_setting_bzrdir)
1394
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1398
1397
def get_format_string(cls):
1399
return b'Bazaar repository format 2a (needs bzr 1.16 or later)\n'
1398
return ('Bazaar repository format 2a (needs bzr 1.16 or later)\n')
1401
1400
def get_format_description(self):
1402
1401
"""See RepositoryFormat.get_format_description()."""
1403
1402
return ("Repository format 2a - rich roots, group compression"
1404
" and chk inventories")
1403
" and chk inventories")
1407
1406
class RepositoryFormat2aSubtree(RepositoryFormat2a):