335
425
if self.do_cache:
336
426
PyDict_SetItem(self._known_heads, heads_key, heads)
430
"""Return the nodes in topological order.
432
All parents must occur before all children.
434
# This is, for the most part, the same iteration order that we used for
435
# _find_gdfo, consider finding a way to remove the duplication
436
# In general, we find the 'tails' (nodes with no parents), and then
437
# walk to the children. For children that have all of their parents
438
# yielded, we queue up the child to be yielded as well.
439
cdef _KnownGraphNode node
440
cdef _KnownGraphNode child
444
cdef Py_ssize_t last_item
446
pending = self._find_tails()
447
if PyList_GET_SIZE(pending) == 0 and len(self._nodes) > 0:
448
raise errors.GraphCycleError(self._nodes)
452
last_item = PyList_GET_SIZE(pending) - 1
453
while last_item >= 0:
454
# Avoid pop followed by push, instead, peek, and replace
455
# timing shows this is 930ms => 770ms for OOo
456
node = _get_list_node(pending, last_item)
457
last_item = last_item - 1
458
if node.parents is not None:
459
# We don't include ghost parents
460
PyList_Append(topo_order, node.key)
461
for pos from 0 <= pos < PyList_GET_SIZE(node.children):
462
child = _get_list_node(node.children, pos)
464
# We know we have a graph cycle because a node has a parent
465
# which we couldn't find
466
raise errors.GraphCycleError(self._nodes)
467
child.seen = child.seen + 1
468
if child.seen == PyTuple_GET_SIZE(child.parents):
469
# All parents of this child have been yielded, queue this
470
# one to be yielded as well
471
last_item = last_item + 1
472
if last_item < PyList_GET_SIZE(pending):
473
Py_INCREF(child) # SetItem steals a ref
474
PyList_SetItem(pending, last_item, child)
476
PyList_Append(pending, child)
477
# We have queued this node, we don't need to track it
480
# We started from the parents, so we don't need to do anymore work
484
"""Return a reverse topological ordering which is 'stable'.
486
There are a few constraints:
487
1) Reverse topological (all children before all parents)
489
3) 'stable' sorting, so that we get the same result, independent of
490
machine, or extra data.
491
To do this, we use the same basic algorithm as topo_sort, but when we
492
aren't sure what node to access next, we sort them lexicographically.
495
cdef Py_ssize_t pos, last_item
496
cdef _KnownGraphNode node, node2, parent_node
498
tips = self._find_tips()
499
# Split the tips based on prefix
501
for pos from 0 <= pos < PyList_GET_SIZE(tips):
502
node = _get_list_node(tips, pos)
503
if PyString_CheckExact(node.key) or len(node.key) == 1:
507
temp = PyDict_GetItem(prefix_tips, prefix)
509
prefix_tips[prefix] = [node]
511
tip_nodes = <object>temp
512
PyList_Append(tip_nodes, node)
515
for prefix in sorted(prefix_tips):
516
temp = PyDict_GetItem(prefix_tips, prefix)
518
tip_nodes = <object>temp
519
pending = _sort_list_nodes(tip_nodes, 1)
520
last_item = PyList_GET_SIZE(pending) - 1
521
while last_item >= 0:
522
node = _get_list_node(pending, last_item)
523
last_item = last_item - 1
524
if node.parents is None:
527
PyList_Append(result, node.key)
528
# Sorting the parent keys isn't strictly necessary for stable
529
# sorting of a given graph. But it does help minimize the
530
# differences between graphs
531
# For bzr.dev ancestry:
533
# 7.73ms RichCompareBool sort
534
parents = _sort_list_nodes(node.parents, 1)
535
for pos from 0 <= pos < len(parents):
536
if PyTuple_CheckExact(parents):
537
parent_node = _get_tuple_node(parents, pos)
539
parent_node = _get_list_node(parents, pos)
540
# TODO: GraphCycle detection
541
parent_node.seen = parent_node.seen + 1
543
== PyList_GET_SIZE(parent_node.children)):
544
# All children have been processed, queue up this
546
last_item = last_item + 1
547
if last_item < PyList_GET_SIZE(pending):
548
Py_INCREF(parent_node) # SetItem steals a ref
549
PyList_SetItem(pending, last_item, parent_node)
551
PyList_Append(pending, parent_node)
555
def merge_sort(self, tip_key):
556
"""Compute the merge sorted graph output."""
557
cdef _MergeSorter sorter
559
# TODO: consider disabling gc since we are allocating a lot of nodes
560
# that won't be collectable anyway. real world testing has not
561
# shown a specific impact, yet.
562
sorter = _MergeSorter(self, tip_key)
563
return sorter.topo_order()
565
def get_parent_keys(self, key):
566
"""Get the parents for a key
568
Returns a list containg the parents keys. If the key is a ghost,
569
None is returned. A KeyError will be raised if the key is not in
572
:param keys: Key to check (eg revision_id)
573
:return: A list of parents
575
return self._nodes[key].parent_keys
577
def get_child_keys(self, key):
578
"""Get the children for a key
580
Returns a list containg the children keys. A KeyError will be raised
581
if the key is not in the graph.
583
:param keys: Key to check (eg revision_id)
584
:return: A list of children
586
return self._nodes[key].child_keys
589
cdef class _MergeSortNode:
590
"""Tracks information about a node during the merge_sort operation."""
593
cdef public object key
594
cdef public long merge_depth
595
cdef public object end_of_merge # True/False Is this the end of the current merge
597
# Private api, used while computing the information
598
cdef _KnownGraphNode left_parent
599
cdef _KnownGraphNode left_pending_parent
600
cdef object pending_parents # list of _KnownGraphNode for non-left parents
601
cdef long _revno_first
602
cdef long _revno_second
603
cdef long _revno_last
604
# TODO: turn these into flag/bit fields rather than individual members
605
cdef int is_first_child # Is this the first child?
606
cdef int seen_by_child # A child node has seen this parent
607
cdef int completed # Fully Processed
609
def __init__(self, key):
611
self.merge_depth = -1
612
self.left_parent = None
613
self.left_pending_parent = None
614
self.pending_parents = None
615
self._revno_first = -1
616
self._revno_second = -1
617
self._revno_last = -1
618
self.is_first_child = 0
619
self.seen_by_child = 0
623
return '%s(%s depth:%s rev:%s,%s,%s first:%s seen:%s)' % (
624
self.__class__.__name__, self.key,
626
self._revno_first, self._revno_second, self._revno_last,
627
self.is_first_child, self.seen_by_child)
629
cdef int has_pending_parents(self):
630
if self.left_pending_parent is not None or self.pending_parents:
634
cdef object _revno(self):
635
if self._revno_first == -1:
636
if self._revno_second != -1:
637
raise RuntimeError('Something wrong with: %s' % (self,))
638
return (self._revno_last,)
640
return (self._revno_first, self._revno_second, self._revno_last)
647
cdef class _MergeSorter:
648
"""This class does the work of computing the merge_sort ordering.
650
We have some small advantages, in that we get all the extra information
651
that KnownGraph knows, like knowing the child lists, etc.
654
# Current performance numbers for merge_sort(bzr_dev_parent_map):
655
# 302ms tsort.merge_sort()
656
# 91ms graph.KnownGraph().merge_sort()
657
# 40ms kg.merge_sort()
659
cdef KnownGraph graph
660
cdef object _depth_first_stack # list
661
cdef Py_ssize_t _last_stack_item # offset to last item on stack
662
# cdef object _ms_nodes # dict of key => _MergeSortNode
663
cdef object _revno_to_branch_count # {revno => num child branches}
664
cdef object _scheduled_nodes # List of nodes ready to be yielded
666
def __init__(self, known_graph, tip_key):
667
cdef _KnownGraphNode node
669
self.graph = known_graph
670
# self._ms_nodes = {}
671
self._revno_to_branch_count = {}
672
self._depth_first_stack = []
673
self._last_stack_item = -1
674
self._scheduled_nodes = []
675
if (tip_key is not None and tip_key != NULL_REVISION
676
and tip_key != (NULL_REVISION,)):
677
node = self.graph._nodes[tip_key]
678
self._push_node(node, 0)
680
cdef _MergeSortNode _get_ms_node(self, _KnownGraphNode node):
681
cdef PyObject *temp_node
682
cdef _MergeSortNode ms_node
684
if node.extra is None:
685
ms_node = _MergeSortNode(node.key)
688
ms_node = <_MergeSortNode>node.extra
691
cdef _push_node(self, _KnownGraphNode node, long merge_depth):
692
cdef _KnownGraphNode parent_node
693
cdef _MergeSortNode ms_node, ms_parent_node
696
ms_node = self._get_ms_node(node)
697
ms_node.merge_depth = merge_depth
698
if node.parents is None:
699
raise RuntimeError('ghost nodes should not be pushed'
700
' onto the stack: %s' % (node,))
701
if PyTuple_GET_SIZE(node.parents) > 0:
702
parent_node = _get_tuple_node(node.parents, 0)
703
ms_node.left_parent = parent_node
704
if parent_node.parents is None: # left-hand ghost
705
ms_node.left_pending_parent = None
706
ms_node.left_parent = None
708
ms_node.left_pending_parent = parent_node
709
if PyTuple_GET_SIZE(node.parents) > 1:
710
ms_node.pending_parents = []
711
for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
712
parent_node = _get_tuple_node(node.parents, pos)
713
if parent_node.parents is None: # ghost
715
PyList_Append(ms_node.pending_parents, parent_node)
717
ms_node.is_first_child = 1
718
if ms_node.left_parent is not None:
719
ms_parent_node = self._get_ms_node(ms_node.left_parent)
720
if ms_parent_node.seen_by_child:
721
ms_node.is_first_child = 0
722
ms_parent_node.seen_by_child = 1
723
self._last_stack_item = self._last_stack_item + 1
724
if self._last_stack_item < PyList_GET_SIZE(self._depth_first_stack):
725
Py_INCREF(node) # SetItem steals a ref
726
PyList_SetItem(self._depth_first_stack, self._last_stack_item,
729
PyList_Append(self._depth_first_stack, node)
731
cdef _pop_node(self):
733
cdef _MergeSortNode ms_node, ms_parent_node, ms_prev_node
734
cdef _KnownGraphNode node, parent_node, prev_node
736
node = _get_list_node(self._depth_first_stack, self._last_stack_item)
737
ms_node = <_MergeSortNode>node.extra
738
self._last_stack_item = self._last_stack_item - 1
739
if ms_node.left_parent is not None:
740
# Assign the revision number from the left-hand parent
741
ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra
742
if ms_node.is_first_child:
743
# First child just increments the final digit
744
ms_node._revno_first = ms_parent_node._revno_first
745
ms_node._revno_second = ms_parent_node._revno_second
746
ms_node._revno_last = ms_parent_node._revno_last + 1
748
# Not the first child, make a new branch
749
# (mainline_revno, branch_count, 1)
750
if ms_parent_node._revno_first == -1:
751
# Mainline ancestor, the increment is on the last digit
752
base_revno = ms_parent_node._revno_last
754
base_revno = ms_parent_node._revno_first
755
temp = PyDict_GetItem(self._revno_to_branch_count,
760
branch_count = (<object>temp) + 1
761
PyDict_SetItem(self._revno_to_branch_count, base_revno,
763
ms_node._revno_first = base_revno
764
ms_node._revno_second = branch_count
765
ms_node._revno_last = 1
767
temp = PyDict_GetItem(self._revno_to_branch_count, 0)
769
# The first root node doesn't have a 3-digit revno
771
ms_node._revno_first = -1
772
ms_node._revno_second = -1
773
ms_node._revno_last = 1
775
root_count = (<object>temp) + 1
776
ms_node._revno_first = 0
777
ms_node._revno_second = root_count
778
ms_node._revno_last = 1
779
PyDict_SetItem(self._revno_to_branch_count, 0, root_count)
780
ms_node.completed = 1
781
if PyList_GET_SIZE(self._scheduled_nodes) == 0:
782
# The first scheduled node is always the end of merge
783
ms_node.end_of_merge = True
785
prev_node = _get_list_node(self._scheduled_nodes,
786
PyList_GET_SIZE(self._scheduled_nodes) - 1)
787
ms_prev_node = <_MergeSortNode>prev_node.extra
788
if ms_prev_node.merge_depth < ms_node.merge_depth:
789
# The previously pushed node is to our left, so this is the end
790
# of this right-hand chain
791
ms_node.end_of_merge = True
792
elif (ms_prev_node.merge_depth == ms_node.merge_depth
793
and prev_node not in node.parents):
794
# The next node is not a direct parent of this node
795
ms_node.end_of_merge = True
797
ms_node.end_of_merge = False
798
PyList_Append(self._scheduled_nodes, node)
800
cdef _schedule_stack(self):
801
cdef _KnownGraphNode last_node, next_node
802
cdef _MergeSortNode ms_node, ms_last_node, ms_next_node
803
cdef long next_merge_depth
805
while self._last_stack_item >= 0:
806
# Peek at the last item on the stack
807
last_node = _get_list_node(self._depth_first_stack,
808
self._last_stack_item)
809
if last_node.gdfo == -1:
810
# if _find_gdfo skipped a node, that means there is a graph
811
# cycle, error out now
812
raise errors.GraphCycleError(self.graph._nodes)
813
ms_last_node = <_MergeSortNode>last_node.extra
814
if not ms_last_node.has_pending_parents():
815
# Processed all parents, pop this node
818
while ms_last_node.has_pending_parents():
819
if ms_last_node.left_pending_parent is not None:
820
# recurse depth first into the primary parent
821
next_node = ms_last_node.left_pending_parent
822
ms_last_node.left_pending_parent = None
824
# place any merges in right-to-left order for scheduling
825
# which gives us left-to-right order after we reverse
826
# the scheduled queue.
827
# Note: This has the effect of allocating common-new
828
# revisions to the right-most subtree rather than the
829
# left most, which will display nicely (you get
830
# smaller trees at the top of the combined merge).
831
next_node = ms_last_node.pending_parents.pop()
832
ms_next_node = self._get_ms_node(next_node)
833
if ms_next_node.completed:
834
# this parent was completed by a child on the
835
# call stack. skip it.
837
# otherwise transfer it from the source graph into the
838
# top of the current depth first search stack.
840
if next_node is ms_last_node.left_parent:
841
next_merge_depth = ms_last_node.merge_depth
843
next_merge_depth = ms_last_node.merge_depth + 1
844
self._push_node(next_node, next_merge_depth)
845
# and do not continue processing parents until this 'call'
849
cdef topo_order(self):
850
cdef _MergeSortNode ms_node
851
cdef _KnownGraphNode node
853
cdef PyObject *temp_key, *temp_node
855
# Note: allocating a _MergeSortNode and deallocating it for all nodes
856
# costs approx 8.52ms (21%) of the total runtime
857
# We might consider moving the attributes into the base
859
self._schedule_stack()
861
# We've set up the basic schedule, now we can continue processing the
863
# Note: This final loop costs us 40.0ms => 28.8ms (11ms, 25%) on
864
# bzr.dev, to convert the internal Object representation into a
865
# Tuple representation...
866
# 2ms is walking the data and computing revno tuples
867
# 7ms is computing the return tuple
868
# 4ms is PyList_Append()
870
# output the result in reverse order, and separate the generated info
871
for pos from PyList_GET_SIZE(self._scheduled_nodes) > pos >= 0:
872
node = _get_list_node(self._scheduled_nodes, pos)
873
ms_node = <_MergeSortNode>node.extra
874
PyList_Append(ordered, ms_node)
876
# Clear out the scheduled nodes now that we're done
877
self._scheduled_nodes = []