25
25
ctypedef struct PyObject:
28
int PyString_CheckExact(object)
30
int PyObject_RichCompareBool(object, object, int)
33
int PyTuple_CheckExact(object)
34
28
object PyTuple_New(Py_ssize_t n)
35
29
Py_ssize_t PyTuple_GET_SIZE(object t)
36
30
PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
37
31
void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
39
int PyList_CheckExact(object)
40
33
Py_ssize_t PyList_GET_SIZE(object l)
41
34
PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
42
35
int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
128
108
return <_KnownGraphNode>temp_node
131
cdef _KnownGraphNode _get_tuple_node(tpl, Py_ssize_t pos):
111
cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):
132
112
cdef PyObject *temp_node
113
cdef _KnownGraphNode node
134
temp_node = PyTuple_GET_ITEM(tpl, pos)
115
temp_node = PyTuple_GET_ITEM(parents, pos)
135
116
return <_KnownGraphNode>temp_node
139
cdef _KnownGraphNode real_node
144
cdef object _sort_list_nodes(object lst_or_tpl, int reverse):
145
"""Sort a list of _KnownGraphNode objects.
147
If lst_or_tpl is a list, it is allowed to mutate in place. It may also
148
just return the input list if everything is already sorted.
150
cdef _KnownGraphNode node1, node2
151
cdef int do_swap, is_tuple
152
cdef Py_ssize_t length
154
is_tuple = PyTuple_CheckExact(lst_or_tpl)
155
if not (is_tuple or PyList_CheckExact(lst_or_tpl)):
156
raise TypeError('lst_or_tpl must be a list or tuple.')
157
length = len(lst_or_tpl)
158
if length == 0 or length == 1:
162
node1 = _get_tuple_node(lst_or_tpl, 0)
163
node2 = _get_tuple_node(lst_or_tpl, 1)
165
node1 = _get_list_node(lst_or_tpl, 0)
166
node2 = _get_list_node(lst_or_tpl, 1)
168
do_swap = PyObject_RichCompareBool(node1.key, node2.key, Py_LT)
170
do_swap = PyObject_RichCompareBool(node2.key, node1.key, Py_LT)
174
return (node2, node1)
176
# Swap 'in-place', since lists are mutable
178
PyList_SetItem(lst_or_tpl, 1, node1)
180
PyList_SetItem(lst_or_tpl, 0, node2)
182
# For all other sizes, we just use 'sorted()'
184
# Note that sorted() is just list(iterable).sort()
185
lst_or_tpl = list(lst_or_tpl)
186
lst_or_tpl.sort(key=get_key, reverse=reverse)
190
119
cdef class _MergeSorter
192
121
cdef class KnownGraph:
193
122
"""This is a class which assumes we already know the full graph."""
195
124
cdef public object _nodes
196
cdef public object _known_heads
125
cdef object _known_heads
197
126
cdef public int do_cache
199
128
def __init__(self, parent_map, do_cache=True):
233
162
node = <_KnownGraphNode>temp_node
236
cdef _populate_parents(self, _KnownGraphNode node, parent_keys):
237
cdef Py_ssize_t num_parent_keys, pos
238
cdef _KnownGraphNode parent_node
240
num_parent_keys = len(parent_keys)
241
# We know how many parents, so we pre allocate the tuple
242
parent_nodes = PyTuple_New(num_parent_keys)
243
for pos from 0 <= pos < num_parent_keys:
244
# Note: it costs us 10ms out of 40ms to lookup all of these
245
# parents, it doesn't seem to be an allocation overhead,
246
# but rather a lookup overhead. There doesn't seem to be
247
# a way around it, and that is one reason why
248
# KnownGraphNode maintains a direct pointer to the parent
250
# We use [] because parent_keys may be a tuple or list
251
parent_node = self._get_or_create_node(parent_keys[pos])
252
# PyTuple_SET_ITEM will steal a reference, so INCREF first
253
Py_INCREF(parent_node)
254
PyTuple_SET_ITEM(parent_nodes, pos, parent_node)
255
PyList_Append(parent_node.children, node)
256
node.parents = parent_nodes
258
165
def _initialize_nodes(self, parent_map):
259
166
"""Populate self._nodes.
276
183
while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
277
184
key = <object>temp_key
278
185
parent_keys = <object>temp_parent_keys
186
num_parent_keys = len(parent_keys)
279
187
node = self._get_or_create_node(key)
280
self._populate_parents(node, parent_keys)
188
# We know how many parents, so we pre allocate the tuple
189
parent_nodes = PyTuple_New(num_parent_keys)
190
for pos2 from 0 <= pos2 < num_parent_keys:
191
# Note: it costs us 10ms out of 40ms to lookup all of these
192
# parents, it doesn't seem to be an allocation overhead,
193
# but rather a lookup overhead. There doesn't seem to be
194
# a way around it, and that is one reason why
195
# KnownGraphNode maintains a direct pointer to the parent
197
# We use [] because parent_keys may be a tuple or list
198
parent_node = self._get_or_create_node(parent_keys[pos2])
199
# PyTuple_SET_ITEM will steal a reference, so INCREF first
200
Py_INCREF(parent_node)
201
PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
202
PyList_Append(parent_node.children, node)
203
node.parents = parent_nodes
282
205
def _find_tails(self):
283
206
cdef PyObject *temp_node
293
216
PyList_Append(tails, node)
296
def _find_tips(self):
297
cdef PyObject *temp_node
298
cdef _KnownGraphNode node
303
while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
304
node = <_KnownGraphNode>temp_node
305
if PyList_GET_SIZE(node.children) == 0:
306
PyList_Append(tips, node)
309
219
def _find_gdfo(self):
310
220
cdef _KnownGraphNode node
311
221
cdef _KnownGraphNode child
344
def add_node(self, key, parent_keys):
345
"""Add a new node to the graph.
347
If this fills in a ghost, then the gdfos of all children will be
350
:param key: The node being added. If this is a duplicate, this is a
352
:param parent_keys: The parents of the given node.
353
:return: None (should we return if this was a ghost, etc?)
355
cdef PyObject *maybe_node
356
cdef _KnownGraphNode node, parent_node, child_node
357
cdef long parent_gdfo, next_gdfo
359
maybe_node = PyDict_GetItem(self._nodes, key)
360
if maybe_node != NULL:
361
node = <_KnownGraphNode>maybe_node
362
if node.parents is None:
363
# We are filling in a ghost
364
self._populate_parents(node, parent_keys)
365
# We can't trust cached heads anymore
366
self._known_heads.clear()
367
else: # Ensure that the parent_key list matches
368
existing_parent_keys = []
369
for parent_node in node.parents:
370
existing_parent_keys.append(parent_node.key)
371
# Make sure we use a list for the comparison, in case it was a
373
parent_keys = list(parent_keys)
374
if existing_parent_keys == parent_keys:
375
# Exact match, nothing more to do
378
raise ValueError('Parent key mismatch, existing node %s'
379
' has parents of %s not %s'
380
% (key, existing_parent_keys, parent_keys))
382
node = _KnownGraphNode(key)
383
PyDict_SetItem(self._nodes, key, node)
384
self._populate_parents(node, parent_keys)
386
for parent_node in node.parents:
387
if parent_node.gdfo == -1:
388
# This is a newly introduced ghost, so it gets gdfo of 1
390
if parent_gdfo < parent_node.gdfo:
391
parent_gdfo = parent_node.gdfo
392
node.gdfo = parent_gdfo + 1
393
# Now fill the gdfo to all children
394
# Note that this loop is slightly inefficient, in that we may visit the
395
# same child (and its decendents) more than once, however, it is
396
# 'efficient' in that we only walk to nodes that would be updated,
397
# rather than all nodes
398
# We use a deque rather than a simple list stack, to go for BFD rather
399
# than DFD. So that if a longer path is possible, we walk it before we
400
# get to the final child
401
pending = deque([node])
402
pending_popleft = pending.popleft
403
pending_append = pending.append
405
node = pending_popleft()
406
next_gdfo = node.gdfo + 1
407
for child_node in node.children:
408
if child_node.gdfo < next_gdfo:
409
# This child is being updated, we need to check its
411
child_node.gdfo = next_gdfo
412
pending_append(child_node)
414
254
def heads(self, keys):
415
255
"""Return the heads from amongst keys.
557
397
# We started from the parents, so we don't need to do anymore work
558
398
return topo_order
561
"""Return a reverse topological ordering which is 'stable'.
563
There are a few constraints:
564
1) Reverse topological (all children before all parents)
566
3) 'stable' sorting, so that we get the same result, independent of
567
machine, or extra data.
568
To do this, we use the same basic algorithm as topo_sort, but when we
569
aren't sure what node to access next, we sort them lexicographically.
572
cdef Py_ssize_t pos, last_item
573
cdef _KnownGraphNode node, node2, parent_node
575
tips = self._find_tips()
576
# Split the tips based on prefix
578
for pos from 0 <= pos < PyList_GET_SIZE(tips):
579
node = _get_list_node(tips, pos)
580
if PyString_CheckExact(node.key) or len(node.key) == 1:
584
temp = PyDict_GetItem(prefix_tips, prefix)
586
prefix_tips[prefix] = [node]
588
tip_nodes = <object>temp
589
PyList_Append(tip_nodes, node)
592
for prefix in sorted(prefix_tips):
593
temp = PyDict_GetItem(prefix_tips, prefix)
595
tip_nodes = <object>temp
596
pending = _sort_list_nodes(tip_nodes, 1)
597
last_item = PyList_GET_SIZE(pending) - 1
598
while last_item >= 0:
599
node = _get_list_node(pending, last_item)
600
last_item = last_item - 1
601
if node.parents is None:
604
PyList_Append(result, node.key)
605
# Sorting the parent keys isn't strictly necessary for stable
606
# sorting of a given graph. But it does help minimize the
607
# differences between graphs
608
# For bzr.dev ancestry:
610
# 7.73ms RichCompareBool sort
611
parents = _sort_list_nodes(node.parents, 1)
612
for pos from 0 <= pos < len(parents):
613
if PyTuple_CheckExact(parents):
614
parent_node = _get_tuple_node(parents, pos)
616
parent_node = _get_list_node(parents, pos)
617
# TODO: GraphCycle detection
618
parent_node.seen = parent_node.seen + 1
620
== PyList_GET_SIZE(parent_node.children)):
621
# All children have been processed, queue up this
623
last_item = last_item + 1
624
if last_item < PyList_GET_SIZE(pending):
625
Py_INCREF(parent_node) # SetItem steals a ref
626
PyList_SetItem(pending, last_item, parent_node)
628
PyList_Append(pending, parent_node)
632
401
def merge_sort(self, tip_key):
633
402
"""Compute the merge sorted graph output."""
638
407
# shown a specific impact, yet.
639
408
sorter = _MergeSorter(self, tip_key)
640
409
return sorter.topo_order()
642
def get_parent_keys(self, key):
643
"""Get the parents for a key
645
Returns a list containg the parents keys. If the key is a ghost,
646
None is returned. A KeyError will be raised if the key is not in
649
:param keys: Key to check (eg revision_id)
650
:return: A list of parents
652
return self._nodes[key].parent_keys
654
def get_child_keys(self, key):
655
"""Get the children for a key
657
Returns a list containg the children keys. A KeyError will be raised
658
if the key is not in the graph.
660
:param keys: Key to check (eg revision_id)
661
:return: A list of children
663
return self._nodes[key].child_keys
666
412
cdef class _MergeSortNode:
697
443
self.completed = 0
699
445
def __repr__(self):
700
return '%s(%s depth:%s rev:%s,%s,%s first:%s seen:%s)' % (
701
self.__class__.__name__, self.key,
446
return '%s(depth:%s rev:%s,%s,%s first:%s seen:%s)' % (self.__class__.__name__,
702
447
self.merge_depth,
703
448
self._revno_first, self._revno_second, self._revno_last,
704
449
self.is_first_child, self.seen_by_child)
706
cdef int has_pending_parents(self): # cannot_raise
451
cdef int has_pending_parents(self):
707
452
if self.left_pending_parent is not None or self.pending_parents:
773
519
ms_node = self._get_ms_node(node)
774
520
ms_node.merge_depth = merge_depth
775
if node.parents is None:
776
raise RuntimeError('ghost nodes should not be pushed'
777
' onto the stack: %s' % (node,))
778
521
if PyTuple_GET_SIZE(node.parents) > 0:
779
parent_node = _get_tuple_node(node.parents, 0)
522
parent_node = _get_parent(node.parents, 0)
780
523
ms_node.left_parent = parent_node
781
if parent_node.parents is None: # left-hand ghost
782
ms_node.left_pending_parent = None
783
ms_node.left_parent = None
785
ms_node.left_pending_parent = parent_node
524
ms_node.left_pending_parent = parent_node
786
525
if PyTuple_GET_SIZE(node.parents) > 1:
787
526
ms_node.pending_parents = []
788
527
for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
789
parent_node = _get_tuple_node(node.parents, pos)
528
parent_node = _get_parent(node.parents, pos)
790
529
if parent_node.parents is None: # ghost
792
531
PyList_Append(ms_node.pending_parents, parent_node)