25
25
ctypedef struct PyObject:
28
int PyString_CheckExact(object)
30
int PyObject_RichCompareBool(object, object, int)
33
int PyTuple_CheckExact(object)
28
34
object PyTuple_New(Py_ssize_t n)
29
35
Py_ssize_t PyTuple_GET_SIZE(object t)
30
36
PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
31
37
void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
39
int PyList_CheckExact(object)
33
40
Py_ssize_t PyList_GET_SIZE(object l)
34
41
PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
35
42
int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
108
128
return <_KnownGraphNode>temp_node
111
cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):
131
cdef _KnownGraphNode _get_tuple_node(tpl, Py_ssize_t pos):
112
132
cdef PyObject *temp_node
113
cdef _KnownGraphNode node
115
temp_node = PyTuple_GET_ITEM(parents, pos)
134
temp_node = PyTuple_GET_ITEM(tpl, pos)
116
135
return <_KnownGraphNode>temp_node
139
cdef _KnownGraphNode real_node
144
cdef object _sort_list_nodes(object lst_or_tpl, int reverse):
145
"""Sort a list of _KnownGraphNode objects.
147
If lst_or_tpl is a list, it is allowed to mutate in place. It may also
148
just return the input list if everything is already sorted.
150
cdef _KnownGraphNode node1, node2
151
cdef int do_swap, is_tuple
152
cdef Py_ssize_t length
154
is_tuple = PyTuple_CheckExact(lst_or_tpl)
155
if not (is_tuple or PyList_CheckExact(lst_or_tpl)):
156
raise TypeError('lst_or_tpl must be a list or tuple.')
157
length = len(lst_or_tpl)
158
if length == 0 or length == 1:
162
node1 = _get_tuple_node(lst_or_tpl, 0)
163
node2 = _get_tuple_node(lst_or_tpl, 1)
165
node1 = _get_list_node(lst_or_tpl, 0)
166
node2 = _get_list_node(lst_or_tpl, 1)
168
do_swap = PyObject_RichCompareBool(node1.key, node2.key, Py_LT)
170
do_swap = PyObject_RichCompareBool(node2.key, node1.key, Py_LT)
174
return (node2, node1)
176
# Swap 'in-place', since lists are mutable
178
PyList_SetItem(lst_or_tpl, 1, node1)
180
PyList_SetItem(lst_or_tpl, 0, node2)
182
# For all other sizes, we just use 'sorted()'
184
# Note that sorted() is just list(iterable).sort()
185
lst_or_tpl = list(lst_or_tpl)
186
lst_or_tpl.sort(key=get_key, reverse=reverse)
119
190
cdef class _MergeSorter
121
192
cdef class KnownGraph:
122
193
"""This is a class which assumes we already know the full graph."""
124
195
cdef public object _nodes
125
cdef object _known_heads
196
cdef public object _known_heads
126
197
cdef public int do_cache
128
199
def __init__(self, parent_map, do_cache=True):
162
233
node = <_KnownGraphNode>temp_node
236
cdef _populate_parents(self, _KnownGraphNode node, parent_keys):
237
cdef Py_ssize_t num_parent_keys, pos
238
cdef _KnownGraphNode parent_node
240
num_parent_keys = len(parent_keys)
241
# We know how many parents, so we pre allocate the tuple
242
parent_nodes = PyTuple_New(num_parent_keys)
243
for pos from 0 <= pos < num_parent_keys:
244
# Note: it costs us 10ms out of 40ms to lookup all of these
245
# parents, it doesn't seem to be an allocation overhead,
246
# but rather a lookup overhead. There doesn't seem to be
247
# a way around it, and that is one reason why
248
# KnownGraphNode maintains a direct pointer to the parent
250
# We use [] because parent_keys may be a tuple or list
251
parent_node = self._get_or_create_node(parent_keys[pos])
252
# PyTuple_SET_ITEM will steal a reference, so INCREF first
253
Py_INCREF(parent_node)
254
PyTuple_SET_ITEM(parent_nodes, pos, parent_node)
255
PyList_Append(parent_node.children, node)
256
node.parents = parent_nodes
165
258
def _initialize_nodes(self, parent_map):
166
259
"""Populate self._nodes.
183
276
while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
184
277
key = <object>temp_key
185
278
parent_keys = <object>temp_parent_keys
186
num_parent_keys = len(parent_keys)
187
279
node = self._get_or_create_node(key)
188
# We know how many parents, so we pre allocate the tuple
189
parent_nodes = PyTuple_New(num_parent_keys)
190
for pos2 from 0 <= pos2 < num_parent_keys:
191
# Note: it costs us 10ms out of 40ms to lookup all of these
192
# parents, it doesn't seem to be an allocation overhead,
193
# but rather a lookup overhead. There doesn't seem to be
194
# a way around it, and that is one reason why
195
# KnownGraphNode maintains a direct pointer to the parent
197
# We use [] because parent_keys may be a tuple or list
198
parent_node = self._get_or_create_node(parent_keys[pos2])
199
# PyTuple_SET_ITEM will steal a reference, so INCREF first
200
Py_INCREF(parent_node)
201
PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
202
PyList_Append(parent_node.children, node)
203
node.parents = parent_nodes
280
self._populate_parents(node, parent_keys)
205
282
def _find_tails(self):
206
283
cdef PyObject *temp_node
216
293
PyList_Append(tails, node)
296
def _find_tips(self):
297
cdef PyObject *temp_node
298
cdef _KnownGraphNode node
303
while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
304
node = <_KnownGraphNode>temp_node
305
if PyList_GET_SIZE(node.children) == 0:
306
PyList_Append(tips, node)
219
309
def _find_gdfo(self):
220
310
cdef _KnownGraphNode node
221
311
cdef _KnownGraphNode child
344
def add_node(self, key, parent_keys):
345
"""Add a new node to the graph.
347
If this fills in a ghost, then the gdfos of all children will be
350
:param key: The node being added. If this is a duplicate, this is a
352
:param parent_keys: The parents of the given node.
353
:return: None (should we return if this was a ghost, etc?)
355
cdef PyObject *maybe_node
356
cdef _KnownGraphNode node, parent_node, child_node
357
cdef long parent_gdfo, next_gdfo
359
maybe_node = PyDict_GetItem(self._nodes, key)
360
if maybe_node != NULL:
361
node = <_KnownGraphNode>maybe_node
362
if node.parents is None:
363
# We are filling in a ghost
364
self._populate_parents(node, parent_keys)
365
# We can't trust cached heads anymore
366
self._known_heads.clear()
367
else: # Ensure that the parent_key list matches
368
existing_parent_keys = []
369
for parent_node in node.parents:
370
existing_parent_keys.append(parent_node.key)
371
# Make sure we use a list for the comparison, in case it was a
373
parent_keys = list(parent_keys)
374
if existing_parent_keys == parent_keys:
375
# Exact match, nothing more to do
378
raise ValueError('Parent key mismatch, existing node %s'
379
' has parents of %s not %s'
380
% (key, existing_parent_keys, parent_keys))
382
node = _KnownGraphNode(key)
383
PyDict_SetItem(self._nodes, key, node)
384
self._populate_parents(node, parent_keys)
386
for parent_node in node.parents:
387
if parent_node.gdfo == -1:
388
# This is a newly introduced ghost, so it gets gdfo of 1
390
if parent_gdfo < parent_node.gdfo:
391
parent_gdfo = parent_node.gdfo
392
node.gdfo = parent_gdfo + 1
393
# Now fill the gdfo to all children
394
# Note that this loop is slightly inefficient, in that we may visit the
395
# same child (and its decendents) more than once, however, it is
396
# 'efficient' in that we only walk to nodes that would be updated,
397
# rather than all nodes
398
# We use a deque rather than a simple list stack, to go for BFD rather
399
# than DFD. So that if a longer path is possible, we walk it before we
400
# get to the final child
401
pending = deque([node])
402
pending_popleft = pending.popleft
403
pending_append = pending.append
405
node = pending_popleft()
406
next_gdfo = node.gdfo + 1
407
for child_node in node.children:
408
if child_node.gdfo < next_gdfo:
409
# This child is being updated, we need to check its
411
child_node.gdfo = next_gdfo
412
pending_append(child_node)
254
414
def heads(self, keys):
255
415
"""Return the heads from amongst keys.
397
557
# We started from the parents, so we don't need to do anymore work
398
558
return topo_order
561
"""Return a reverse topological ordering which is 'stable'.
563
There are a few constraints:
564
1) Reverse topological (all children before all parents)
566
3) 'stable' sorting, so that we get the same result, independent of
567
machine, or extra data.
568
To do this, we use the same basic algorithm as topo_sort, but when we
569
aren't sure what node to access next, we sort them lexicographically.
572
cdef Py_ssize_t pos, last_item
573
cdef _KnownGraphNode node, node2, parent_node
575
tips = self._find_tips()
576
# Split the tips based on prefix
578
for pos from 0 <= pos < PyList_GET_SIZE(tips):
579
node = _get_list_node(tips, pos)
580
if PyString_CheckExact(node.key) or len(node.key) == 1:
584
temp = PyDict_GetItem(prefix_tips, prefix)
586
prefix_tips[prefix] = [node]
588
tip_nodes = <object>temp
589
PyList_Append(tip_nodes, node)
592
for prefix in sorted(prefix_tips):
593
temp = PyDict_GetItem(prefix_tips, prefix)
595
tip_nodes = <object>temp
596
pending = _sort_list_nodes(tip_nodes, 1)
597
last_item = PyList_GET_SIZE(pending) - 1
598
while last_item >= 0:
599
node = _get_list_node(pending, last_item)
600
last_item = last_item - 1
601
if node.parents is None:
604
PyList_Append(result, node.key)
605
# Sorting the parent keys isn't strictly necessary for stable
606
# sorting of a given graph. But it does help minimize the
607
# differences between graphs
608
# For bzr.dev ancestry:
610
# 7.73ms RichCompareBool sort
611
parents = _sort_list_nodes(node.parents, 1)
612
for pos from 0 <= pos < len(parents):
613
if PyTuple_CheckExact(parents):
614
parent_node = _get_tuple_node(parents, pos)
616
parent_node = _get_list_node(parents, pos)
617
# TODO: GraphCycle detection
618
parent_node.seen = parent_node.seen + 1
620
== PyList_GET_SIZE(parent_node.children)):
621
# All children have been processed, queue up this
623
last_item = last_item + 1
624
if last_item < PyList_GET_SIZE(pending):
625
Py_INCREF(parent_node) # SetItem steals a ref
626
PyList_SetItem(pending, last_item, parent_node)
628
PyList_Append(pending, parent_node)
401
632
def merge_sort(self, tip_key):
402
633
"""Compute the merge sorted graph output."""
407
638
# shown a specific impact, yet.
408
639
sorter = _MergeSorter(self, tip_key)
409
640
return sorter.topo_order()
642
def get_parent_keys(self, key):
643
"""Get the parents for a key
645
Returns a list containg the parents keys. If the key is a ghost,
646
None is returned. A KeyError will be raised if the key is not in
649
:param keys: Key to check (eg revision_id)
650
:return: A list of parents
652
return self._nodes[key].parent_keys
654
def get_child_keys(self, key):
655
"""Get the children for a key
657
Returns a list containg the children keys. A KeyError will be raised
658
if the key is not in the graph.
660
:param keys: Key to check (eg revision_id)
661
:return: A list of children
663
return self._nodes[key].child_keys
412
666
cdef class _MergeSortNode:
522
776
raise RuntimeError('ghost nodes should not be pushed'
523
777
' onto the stack: %s' % (node,))
524
778
if PyTuple_GET_SIZE(node.parents) > 0:
525
parent_node = _get_parent(node.parents, 0)
779
parent_node = _get_tuple_node(node.parents, 0)
526
780
ms_node.left_parent = parent_node
527
781
if parent_node.parents is None: # left-hand ghost
528
782
ms_node.left_pending_parent = None