924
925
# we have three major tasks here:
925
926
# 1) generate the ideal index
926
927
repo = self._pack_collection.repo
927
ideal_index = repo._generate_text_key_index(self._text_refs)
928
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
930
self.new_pack.revision_index.iter_all_entries()])
931
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
928
932
# 2) generate a text_nodes list that contains all the deltas that can
929
933
# be used as-is, with corrected parents.
966
970
# 3) bulk copy the ok data
967
971
list(self._copy_nodes_graph(ok_nodes, text_index_map,
968
972
self.new_pack._writer, self.new_pack.text_index))
969
# 3) adhoc copy all the other texts.
973
# 4) adhoc copy all the other texts.
974
# We have to topologically insert all texts otherwise we can fail to
975
# reconcile when parts of a single delta chain are preserved intact,
976
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
977
# reinserted, and if d3 has incorrect parents it will also be
978
# reinserted. If we insert d3 first, d2 is present (as it was bulk
979
# copied), so we will try to delta, but d2 is not currently able to be
980
# extracted because it's basis d1 is not present. Topologically sorting
981
# addresses this. The following generates a sort for all the texts that
982
# are being inserted without having to reference the entire text key
983
# space (we only topo sort the revisions, which is smaller).
984
topo_order = tsort.topo_sort(ancestors)
985
rev_order = dict(zip(topo_order, range(len(topo_order))))
986
bad_texts.sort(key=lambda key:rev_order[key[0][1]])
970
987
transaction = repo.get_transaction()
971
988
file_id_index = GraphIndexPrefixAdapter(
972
989
self.new_pack.text_index,
1006
1023
knit_index._add_callback = file_id_index.add_nodes
1007
1024
output_knit.add_lines_with_ghosts(
1008
1025
key[1], parents, text_lines, random_id=True, check_content=False)
1009
# 4) check that nothing inserted has a reference outside the keyspace.
1026
# 5) check that nothing inserted has a reference outside the keyspace.
1010
1027
missing_text_keys = self.new_pack._external_compression_parents_of_texts()
1011
1028
if missing_text_keys:
1012
1029
raise errors.BzrError('Reference to missing compression parents %r'