509
335
if self.do_cache:
510
336
PyDict_SetItem(self._known_heads, heads_key, heads)
514
"""Return the nodes in topological order.
516
All parents must occur before all children.
518
# This is, for the most part, the same iteration order that we used for
519
# _find_gdfo, consider finding a way to remove the duplication
520
# In general, we find the 'tails' (nodes with no parents), and then
521
# walk to the children. For children that have all of their parents
522
# yielded, we queue up the child to be yielded as well.
523
cdef _KnownGraphNode node
524
cdef _KnownGraphNode child
528
cdef Py_ssize_t last_item
530
pending = self._find_tails()
531
if PyList_GET_SIZE(pending) == 0 and len(self._nodes) > 0:
532
raise errors.GraphCycleError(self._nodes)
536
last_item = PyList_GET_SIZE(pending) - 1
537
while last_item >= 0:
538
# Avoid pop followed by push, instead, peek, and replace
539
# timing shows this is 930ms => 770ms for OOo
540
node = _get_list_node(pending, last_item)
541
last_item = last_item - 1
542
if node.parents is not None:
543
# We don't include ghost parents
544
PyList_Append(topo_order, node.key)
545
for pos from 0 <= pos < PyList_GET_SIZE(node.children):
546
child = _get_list_node(node.children, pos)
548
# We know we have a graph cycle because a node has a parent
549
# which we couldn't find
550
raise errors.GraphCycleError(self._nodes)
551
child.seen = child.seen + 1
552
if child.seen == PyTuple_GET_SIZE(child.parents):
553
# All parents of this child have been yielded, queue this
554
# one to be yielded as well
555
last_item = last_item + 1
556
if last_item < PyList_GET_SIZE(pending):
557
Py_INCREF(child) # SetItem steals a ref
558
PyList_SetItem(pending, last_item, child)
560
PyList_Append(pending, child)
561
# We have queued this node, we don't need to track it
564
# We started from the parents, so we don't need to do anymore work
568
"""Return a reverse topological ordering which is 'stable'.
570
There are a few constraints:
571
1) Reverse topological (all children before all parents)
573
3) 'stable' sorting, so that we get the same result, independent of
574
machine, or extra data.
575
To do this, we use the same basic algorithm as topo_sort, but when we
576
aren't sure what node to access next, we sort them lexicographically.
579
cdef Py_ssize_t pos, last_item
580
cdef _KnownGraphNode node, node2, parent_node
582
tips = self._find_tips()
583
# Split the tips based on prefix
585
for pos from 0 <= pos < PyList_GET_SIZE(tips):
586
node = _get_list_node(tips, pos)
587
if PyBytes_CheckExact(node.key) or len(node.key) == 1:
591
temp = PyDict_GetItem(prefix_tips, prefix)
593
prefix_tips[prefix] = [node]
595
tip_nodes = <object>temp
596
PyList_Append(tip_nodes, node)
599
for prefix in sorted(prefix_tips):
600
temp = PyDict_GetItem(prefix_tips, prefix)
602
tip_nodes = <object>temp
603
pending = _sort_list_nodes(tip_nodes, 1)
604
last_item = PyList_GET_SIZE(pending) - 1
605
while last_item >= 0:
606
node = _get_list_node(pending, last_item)
607
last_item = last_item - 1
608
if node.parents is None:
611
PyList_Append(result, node.key)
612
# Sorting the parent keys isn't strictly necessary for stable
613
# sorting of a given graph. But it does help minimize the
614
# differences between graphs
615
# For bzr.dev ancestry:
617
# 7.73ms RichCompareBool sort
618
parents = _sort_list_nodes(node.parents, 1)
619
for pos from 0 <= pos < len(parents):
620
if PyTuple_CheckExact(parents):
621
parent_node = _get_tuple_node(parents, pos)
623
parent_node = _get_list_node(parents, pos)
624
# TODO: GraphCycle detection
625
parent_node.seen = parent_node.seen + 1
627
== PyList_GET_SIZE(parent_node.children)):
628
# All children have been processed, queue up this
630
last_item = last_item + 1
631
if last_item < PyList_GET_SIZE(pending):
632
Py_INCREF(parent_node) # SetItem steals a ref
633
PyList_SetItem(pending, last_item, parent_node)
635
PyList_Append(pending, parent_node)
639
def merge_sort(self, tip_key):
640
"""Compute the merge sorted graph output."""
641
cdef _MergeSorter sorter
643
# TODO: consider disabling gc since we are allocating a lot of nodes
644
# that won't be collectable anyway. real world testing has not
645
# shown a specific impact, yet.
646
sorter = _MergeSorter(self, tip_key)
647
return sorter.topo_order()
649
def get_parent_keys(self, key):
650
"""Get the parents for a key
652
Returns a list containing the parents keys. If the key is a ghost,
653
None is returned. A KeyError will be raised if the key is not in
656
:param keys: Key to check (eg revision_id)
657
:return: A list of parents
659
return self._nodes[key].parent_keys
661
def get_child_keys(self, key):
662
"""Get the children for a key
664
Returns a list containing the children keys. A KeyError will be raised
665
if the key is not in the graph.
667
:param keys: Key to check (eg revision_id)
668
:return: A list of children
670
return self._nodes[key].child_keys
673
cdef class _MergeSortNode:
674
"""Tracks information about a node during the merge_sort operation."""
677
cdef public object key
678
cdef public long merge_depth
679
cdef public object end_of_merge # True/False Is this the end of the current merge
681
# Private api, used while computing the information
682
cdef _KnownGraphNode left_parent
683
cdef _KnownGraphNode left_pending_parent
684
cdef object pending_parents # list of _KnownGraphNode for non-left parents
685
cdef long _revno_first
686
cdef long _revno_second
687
cdef long _revno_last
688
# TODO: turn these into flag/bit fields rather than individual members
689
cdef int is_first_child # Is this the first child?
690
cdef int seen_by_child # A child node has seen this parent
691
cdef int completed # Fully Processed
693
def __init__(self, key):
695
self.merge_depth = -1
696
self.left_parent = None
697
self.left_pending_parent = None
698
self.pending_parents = None
699
self._revno_first = -1
700
self._revno_second = -1
701
self._revno_last = -1
702
self.is_first_child = 0
703
self.seen_by_child = 0
707
return '%s(%s depth:%s rev:%s,%s,%s first:%s seen:%s)' % (
708
self.__class__.__name__, self.key,
710
self._revno_first, self._revno_second, self._revno_last,
711
self.is_first_child, self.seen_by_child)
713
cdef int has_pending_parents(self): # cannot_raise
714
if self.left_pending_parent is not None or self.pending_parents:
718
cdef object _revno(self):
719
if self._revno_first == -1:
720
if self._revno_second != -1:
721
raise RuntimeError('Something wrong with: %s' % (self,))
722
return (self._revno_last,)
724
return (self._revno_first, self._revno_second, self._revno_last)
731
cdef class _MergeSorter:
732
"""This class does the work of computing the merge_sort ordering.
734
We have some small advantages, in that we get all the extra information
735
that KnownGraph knows, like knowing the child lists, etc.
738
# Current performance numbers for merge_sort(bzr_dev_parent_map):
739
# 302ms tsort.merge_sort()
740
# 91ms graph.KnownGraph().merge_sort()
741
# 40ms kg.merge_sort()
743
cdef KnownGraph graph
744
cdef object _depth_first_stack # list
745
cdef Py_ssize_t _last_stack_item # offset to last item on stack
746
# cdef object _ms_nodes # dict of key => _MergeSortNode
747
cdef object _revno_to_branch_count # {revno => num child branches}
748
cdef object _scheduled_nodes # List of nodes ready to be yielded
750
def __init__(self, known_graph, tip_key):
751
cdef _KnownGraphNode node
753
self.graph = known_graph
754
# self._ms_nodes = {}
755
self._revno_to_branch_count = {}
756
self._depth_first_stack = []
757
self._last_stack_item = -1
758
self._scheduled_nodes = []
759
if (tip_key is not None and tip_key != NULL_REVISION
760
and tip_key != (NULL_REVISION,)):
761
node = self.graph._nodes[tip_key]
762
self._push_node(node, 0)
764
cdef _MergeSortNode _get_ms_node(self, _KnownGraphNode node):
765
cdef PyObject *temp_node
766
cdef _MergeSortNode ms_node
768
if node.extra is None:
769
ms_node = _MergeSortNode(node.key)
772
ms_node = <_MergeSortNode>node.extra
775
cdef _push_node(self, _KnownGraphNode node, long merge_depth):
776
cdef _KnownGraphNode parent_node
777
cdef _MergeSortNode ms_node, ms_parent_node
780
ms_node = self._get_ms_node(node)
781
ms_node.merge_depth = merge_depth
782
if node.parents is None:
783
raise RuntimeError('ghost nodes should not be pushed'
784
' onto the stack: %s' % (node,))
785
if PyTuple_GET_SIZE(node.parents) > 0:
786
parent_node = _get_tuple_node(node.parents, 0)
787
ms_node.left_parent = parent_node
788
if parent_node.parents is None: # left-hand ghost
789
ms_node.left_pending_parent = None
790
ms_node.left_parent = None
792
ms_node.left_pending_parent = parent_node
793
if PyTuple_GET_SIZE(node.parents) > 1:
794
ms_node.pending_parents = []
795
for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
796
parent_node = _get_tuple_node(node.parents, pos)
797
if parent_node.parents is None: # ghost
799
PyList_Append(ms_node.pending_parents, parent_node)
801
ms_node.is_first_child = 1
802
if ms_node.left_parent is not None:
803
ms_parent_node = self._get_ms_node(ms_node.left_parent)
804
if ms_parent_node.seen_by_child:
805
ms_node.is_first_child = 0
806
ms_parent_node.seen_by_child = 1
807
self._last_stack_item = self._last_stack_item + 1
808
if self._last_stack_item < PyList_GET_SIZE(self._depth_first_stack):
809
Py_INCREF(node) # SetItem steals a ref
810
PyList_SetItem(self._depth_first_stack, self._last_stack_item,
813
PyList_Append(self._depth_first_stack, node)
815
cdef _pop_node(self):
817
cdef _MergeSortNode ms_node, ms_parent_node, ms_prev_node
818
cdef _KnownGraphNode node, parent_node, prev_node
820
node = _get_list_node(self._depth_first_stack, self._last_stack_item)
821
ms_node = <_MergeSortNode>node.extra
822
self._last_stack_item = self._last_stack_item - 1
823
if ms_node.left_parent is not None:
824
# Assign the revision number from the left-hand parent
825
ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra
826
if ms_node.is_first_child:
827
# First child just increments the final digit
828
ms_node._revno_first = ms_parent_node._revno_first
829
ms_node._revno_second = ms_parent_node._revno_second
830
ms_node._revno_last = ms_parent_node._revno_last + 1
832
# Not the first child, make a new branch
833
# (mainline_revno, branch_count, 1)
834
if ms_parent_node._revno_first == -1:
835
# Mainline ancestor, the increment is on the last digit
836
base_revno = ms_parent_node._revno_last
838
base_revno = ms_parent_node._revno_first
839
temp = PyDict_GetItem(self._revno_to_branch_count,
844
branch_count = (<object>temp) + 1
845
PyDict_SetItem(self._revno_to_branch_count, base_revno,
847
ms_node._revno_first = base_revno
848
ms_node._revno_second = branch_count
849
ms_node._revno_last = 1
851
temp = PyDict_GetItem(self._revno_to_branch_count, 0)
853
# The first root node doesn't have a 3-digit revno
855
ms_node._revno_first = -1
856
ms_node._revno_second = -1
857
ms_node._revno_last = 1
859
root_count = (<object>temp) + 1
860
ms_node._revno_first = 0
861
ms_node._revno_second = root_count
862
ms_node._revno_last = 1
863
PyDict_SetItem(self._revno_to_branch_count, 0, root_count)
864
ms_node.completed = 1
865
if PyList_GET_SIZE(self._scheduled_nodes) == 0:
866
# The first scheduled node is always the end of merge
867
ms_node.end_of_merge = True
869
prev_node = _get_list_node(self._scheduled_nodes,
870
PyList_GET_SIZE(self._scheduled_nodes) - 1)
871
ms_prev_node = <_MergeSortNode>prev_node.extra
872
if ms_prev_node.merge_depth < ms_node.merge_depth:
873
# The previously pushed node is to our left, so this is the end
874
# of this right-hand chain
875
ms_node.end_of_merge = True
876
elif (ms_prev_node.merge_depth == ms_node.merge_depth
877
and prev_node not in node.parents):
878
# The next node is not a direct parent of this node
879
ms_node.end_of_merge = True
881
ms_node.end_of_merge = False
882
PyList_Append(self._scheduled_nodes, node)
884
cdef _schedule_stack(self):
885
cdef _KnownGraphNode last_node, next_node
886
cdef _MergeSortNode ms_node, ms_last_node, ms_next_node
887
cdef long next_merge_depth
889
while self._last_stack_item >= 0:
890
# Peek at the last item on the stack
891
last_node = _get_list_node(self._depth_first_stack,
892
self._last_stack_item)
893
if last_node.gdfo == -1:
894
# if _find_gdfo skipped a node, that means there is a graph
895
# cycle, error out now
896
raise errors.GraphCycleError(self.graph._nodes)
897
ms_last_node = <_MergeSortNode>last_node.extra
898
if not ms_last_node.has_pending_parents():
899
# Processed all parents, pop this node
902
while ms_last_node.has_pending_parents():
903
if ms_last_node.left_pending_parent is not None:
904
# recurse depth first into the primary parent
905
next_node = ms_last_node.left_pending_parent
906
ms_last_node.left_pending_parent = None
908
# place any merges in right-to-left order for scheduling
909
# which gives us left-to-right order after we reverse
910
# the scheduled queue.
911
# Note: This has the effect of allocating common-new
912
# revisions to the right-most subtree rather than the
913
# left most, which will display nicely (you get
914
# smaller trees at the top of the combined merge).
915
next_node = ms_last_node.pending_parents.pop()
916
ms_next_node = self._get_ms_node(next_node)
917
if ms_next_node.completed:
918
# this parent was completed by a child on the
919
# call stack. skip it.
921
# otherwise transfer it from the source graph into the
922
# top of the current depth first search stack.
924
if next_node is ms_last_node.left_parent:
925
next_merge_depth = ms_last_node.merge_depth
927
next_merge_depth = ms_last_node.merge_depth + 1
928
self._push_node(next_node, next_merge_depth)
929
# and do not continue processing parents until this 'call'
933
cdef topo_order(self):
934
cdef _MergeSortNode ms_node
935
cdef _KnownGraphNode node
937
cdef PyObject *temp_key
938
cdef PyObject *temp_node
940
# Note: allocating a _MergeSortNode and deallocating it for all nodes
941
# costs approx 8.52ms (21%) of the total runtime
942
# We might consider moving the attributes into the base
944
self._schedule_stack()
946
# We've set up the basic schedule, now we can continue processing the
948
# Note: This final loop costs us 40.0ms => 28.8ms (11ms, 25%) on
949
# bzr.dev, to convert the internal Object representation into a
950
# Tuple representation...
951
# 2ms is walking the data and computing revno tuples
952
# 7ms is computing the return tuple
953
# 4ms is PyList_Append()
955
# output the result in reverse order, and separate the generated info
956
for pos from PyList_GET_SIZE(self._scheduled_nodes) > pos >= 0:
957
node = _get_list_node(self._scheduled_nodes, pos)
958
ms_node = <_MergeSortNode>node.extra
959
PyList_Append(ordered, ms_node)
961
# Clear out the scheduled nodes now that we're done
962
self._scheduled_nodes = []