/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Implementation of Graph algorithms when we have already loaded everything.
18
"""
19
7206.3.3 by Jelmer Vernooij
Fix collections import.
20
try:
21
    from collections.abc import deque
22
except ImportError:  # python < 3.7
23
    from collections import deque
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
24
from . import (
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
25
    errors,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
26
    revision,
27
    )
28
29
30
class _KnownGraphNode(object):
31
    """Represents a single object in the known graph."""
32
4371.4.10 by Vincent Ladeuil
Cleanup.
33
    __slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
34
35
    def __init__(self, key, parent_keys):
36
        self.key = key
37
        self.parent_keys = parent_keys
38
        self.child_keys = []
39
        # Greatest distance from origin
40
        self.gdfo = None
41
42
    def __repr__(self):
4371.4.10 by Vincent Ladeuil
Cleanup.
43
        return '%s(%s  gdfo:%s par:%s child:%s)' % (
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
44
            self.__class__.__name__, self.key, self.gdfo,
4371.4.10 by Vincent Ladeuil
Cleanup.
45
            self.parent_keys, self.child_keys)
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
46
47
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
48
class _MergeSortNode(object):
49
    """Information about a specific node in the merge graph."""
50
51
    __slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
52
53
    def __init__(self, key, merge_depth, revno, end_of_merge):
54
        self.key = key
55
        self.merge_depth = merge_depth
56
        self.revno = revno
57
        self.end_of_merge = end_of_merge
58
59
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
60
class KnownGraph(object):
61
    """This is a class which assumes we already know the full graph."""
62
63
    def __init__(self, parent_map, do_cache=True):
64
        """Create a new KnownGraph instance.
65
66
        :param parent_map: A dictionary mapping key => parent_keys
67
        """
68
        self._nodes = {}
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
69
        # Maps {frozenset(revision_id, revision_id): heads}
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
70
        self._known_heads = {}
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
71
        self.do_cache = do_cache
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
72
        self._initialize_nodes(parent_map)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
73
        self._find_gdfo()
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
74
75
    def _initialize_nodes(self, parent_map):
76
        """Populate self._nodes.
77
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
78
        After this has finished:
79
        - self._nodes will have an entry for every entry in parent_map.
80
        - ghosts will have a parent_keys = None,
81
        - all nodes found will also have .child_keys populated with all known
82
          child_keys,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
83
        """
84
        nodes = self._nodes
7479.2.1 by Jelmer Vernooij
Drop python2 support.
85
        for key, parent_keys in parent_map.items():
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
86
            if key in nodes:
87
                node = nodes[key]
88
                node.parent_keys = parent_keys
89
            else:
90
                node = _KnownGraphNode(key, parent_keys)
91
                nodes[key] = node
92
            for parent_key in parent_keys:
93
                try:
94
                    parent_node = nodes[parent_key]
95
                except KeyError:
96
                    parent_node = _KnownGraphNode(parent_key, None)
97
                    nodes[parent_key] = parent_node
98
                parent_node.child_keys.append(key)
99
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
100
    def _find_tails(self):
7479.2.1 by Jelmer Vernooij
Drop python2 support.
101
        return [node for node in self._nodes.values()
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
102
                if not node.parent_keys]
103
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
104
    def _find_tips(self):
7479.2.1 by Jelmer Vernooij
Drop python2 support.
105
        return [node for node in self._nodes.values()
7143.15.2 by Jelmer Vernooij
Run autopep8.
106
                if not node.child_keys]
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
107
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
108
    def _find_gdfo(self):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
109
        nodes = self._nodes
4371.4.10 by Vincent Ladeuil
Cleanup.
110
        known_parent_gdfos = {}
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
111
        pending = []
112
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
113
        for node in self._find_tails():
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
114
            node.gdfo = 1
115
            pending.append(node)
116
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
117
        while pending:
118
            node = pending.pop()
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
119
            for child_key in node.child_keys:
120
                child = nodes[child_key]
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
121
                if child_key in known_parent_gdfos:
122
                    known_gdfo = known_parent_gdfos[child_key] + 1
123
                    present = True
124
                else:
125
                    known_gdfo = 1
126
                    present = False
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
127
                if child.gdfo is None or node.gdfo + 1 > child.gdfo:
128
                    child.gdfo = node.gdfo + 1
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
129
                if known_gdfo == len(child.parent_keys):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
130
                    # We are the last parent updating that node, we can
131
                    # continue from there
4371.4.2 by Vincent Ladeuil
Same gdfo processing, without recursion.
132
                    pending.append(child)
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
133
                    if present:
134
                        del known_parent_gdfos[child_key]
135
                else:
136
                    # Update known_parent_gdfos for a key we couldn't process
137
                    known_parent_gdfos[child_key] = known_gdfo
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
138
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
139
    def add_node(self, key, parent_keys):
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
140
        """Add a new node to the graph.
141
142
        If this fills in a ghost, then the gdfos of all children will be
143
        updated accordingly.
7143.15.2 by Jelmer Vernooij
Run autopep8.
144
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
145
        :param key: The node being added. If this is a duplicate, this is a
146
            no-op.
147
        :param parent_keys: The parents of the given node.
148
        :return: None (should we return if this was a ghost, etc?)
149
        """
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
150
        nodes = self._nodes
151
        if key in nodes:
152
            node = nodes[key]
153
            if node.parent_keys is None:
154
                node.parent_keys = parent_keys
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
155
                # A ghost is being added, we can no-longer trust the heads
156
                # cache, so clear it
157
                self._known_heads.clear()
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
158
            else:
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
159
                # Make sure we compare a list to a list, as tuple != list.
160
                parent_keys = list(parent_keys)
161
                existing_parent_keys = list(node.parent_keys)
162
                if parent_keys == existing_parent_keys:
7143.15.2 by Jelmer Vernooij
Run autopep8.
163
                    return  # Identical content
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
164
                else:
7143.15.5 by Jelmer Vernooij
More PEP8 fixes.
165
                    raise ValueError(
166
                        'Parent key mismatch, existing node %s'
167
                        ' has parents of %s not %s'
168
                        % (key, existing_parent_keys, parent_keys))
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
169
        else:
170
            node = _KnownGraphNode(key, parent_keys)
171
            nodes[key] = node
172
        parent_gdfo = 0
173
        for parent_key in parent_keys:
174
            try:
175
                parent_node = nodes[parent_key]
176
            except KeyError:
177
                parent_node = _KnownGraphNode(parent_key, None)
178
                # Ghosts and roots have gdfo 1
179
                parent_node.gdfo = 1
180
                nodes[parent_key] = parent_node
181
            if parent_gdfo < parent_node.gdfo:
182
                parent_gdfo = parent_node.gdfo
183
            parent_node.child_keys.append(key)
184
        node.gdfo = parent_gdfo + 1
185
        # Now fill the gdfo to all children
186
        # Note that this loop is slightly inefficient, in that we may visit the
187
        # same child (and its decendents) more than once, however, it is
188
        # 'efficient' in that we only walk to nodes that would be updated,
189
        # rather than all nodes
190
        # We use a deque rather than a simple list stack, to go for BFD rather
191
        # than DFD. So that if a longer path is possible, we walk it before we
192
        # get to the final child
7206.3.10 by Jelmer Vernooij
Fix two more tests.
193
        pending = deque([node])
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
194
        while pending:
195
            node = pending.popleft()
196
            next_gdfo = node.gdfo + 1
197
            for child_key in node.child_keys:
198
                child = nodes[child_key]
199
                if child.gdfo < next_gdfo:
200
                    # This child is being updated, we need to check its
201
                    # children
202
                    child.gdfo = next_gdfo
203
                    pending.append(child)
204
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
205
    def heads(self, keys):
206
        """Return the heads from amongst keys.
207
208
        This is done by searching the ancestries of each key.  Any key that is
209
        reachable from another key is not returned; all the others are.
210
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
211
        This operation scales with the relative depth between any two keys. It
212
        uses gdfo to avoid walking all ancestry.
213
214
        :param keys: An iterable of keys.
215
        :return: A set of the heads. Note that as a set there is no ordering
216
            information. Callers will need to filter their input to create
217
            order if they need it.
218
        """
219
        candidate_nodes = dict((key, self._nodes[key]) for key in keys)
220
        if revision.NULL_REVISION in candidate_nodes:
221
            # NULL_REVISION is only a head if it is the only entry
222
            candidate_nodes.pop(revision.NULL_REVISION)
223
            if not candidate_nodes:
224
                return frozenset([revision.NULL_REVISION])
225
        if len(candidate_nodes) < 2:
226
            # No or only one candidate
227
            return frozenset(candidate_nodes)
228
        heads_key = frozenset(candidate_nodes)
229
        # Do we have a cached result ?
230
        try:
231
            heads = self._known_heads[heads_key]
232
            return heads
233
        except KeyError:
234
            pass
235
        # Let's compute the heads
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
236
        seen = set()
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
237
        pending = []
238
        min_gdfo = None
7479.2.1 by Jelmer Vernooij
Drop python2 support.
239
        for node in candidate_nodes.values():
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
240
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
241
                pending.extend(node.parent_keys)
242
            if min_gdfo is None or node.gdfo < min_gdfo:
243
                min_gdfo = node.gdfo
244
        nodes = self._nodes
245
        while pending:
246
            node_key = pending.pop()
247
            if node_key in seen:
248
                # node already appears in some ancestry
249
                continue
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
250
            seen.add(node_key)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
251
            node = nodes[node_key]
252
            if node.gdfo <= min_gdfo:
253
                continue
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
254
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
255
                pending.extend(node.parent_keys)
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
256
        heads = heads_key.difference(seen)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
257
        if self.do_cache:
258
            self._known_heads[heads_key] = heads
259
        return heads
260
4593.5.2 by John Arbash Meinel
Implement a very basic version of KnownGraph.topo_sort that just
261
    def topo_sort(self):
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
262
        """Return the nodes in topological order.
263
264
        All parents must occur before all children.
265
        """
7479.2.1 by Jelmer Vernooij
Drop python2 support.
266
        for node in self._nodes.values():
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
267
            if node.gdfo is None:
268
                raise errors.GraphCycleError(self._nodes)
269
        pending = self._find_tails()
270
        pending_pop = pending.pop
271
        pending_append = pending.append
272
273
        topo_order = []
274
        topo_order_append = topo_order.append
275
276
        num_seen_parents = dict.fromkeys(self._nodes, 0)
277
        while pending:
278
            node = pending_pop()
279
            if node.parent_keys is not None:
280
                # We don't include ghost parents
281
                topo_order_append(node.key)
282
            for child_key in node.child_keys:
283
                child_node = self._nodes[child_key]
284
                seen_parents = num_seen_parents[child_key] + 1
285
                if seen_parents == len(child_node.parent_keys):
286
                    # All parents have been processed, enqueue this child
287
                    pending_append(child_node)
288
                    # This has been queued up, stop tracking it
289
                    del num_seen_parents[child_key]
290
                else:
291
                    num_seen_parents[child_key] = seen_parents
292
        # We started from the parents, so we don't need to do anymore work
293
        return topo_order
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
294
4641.5.2 by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests.
295
    def gc_sort(self):
296
        """Return a reverse topological ordering which is 'stable'.
297
298
        There are a few constraints:
299
          1) Reverse topological (all children before all parents)
300
          2) Grouped by prefix
301
          3) 'stable' sorting, so that we get the same result, independent of
302
             machine, or extra data.
303
        To do this, we use the same basic algorithm as topo_sort, but when we
304
        aren't sure what node to access next, we sort them lexicographically.
305
        """
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
306
        tips = self._find_tips()
307
        # Split the tips based on prefix
308
        prefix_tips = {}
309
        for node in tips:
310
            if node.key.__class__ is str or len(node.key) == 1:
311
                prefix = ''
312
            else:
313
                prefix = node.key[0]
314
            prefix_tips.setdefault(prefix, []).append(node)
315
316
        num_seen_children = dict.fromkeys(self._nodes, 0)
317
318
        result = []
319
        for prefix in sorted(prefix_tips):
7143.15.2 by Jelmer Vernooij
Run autopep8.
320
            pending = sorted(prefix_tips[prefix], key=lambda n: n.key,
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
321
                             reverse=True)
322
            while pending:
323
                node = pending.pop()
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
324
                if node.parent_keys is None:
325
                    # Ghost node, skip it
326
                    continue
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
327
                result.append(node.key)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
328
                for parent_key in sorted(node.parent_keys, reverse=True):
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
329
                    parent_node = self._nodes[parent_key]
330
                    seen_children = num_seen_children[parent_key] + 1
331
                    if seen_children == len(parent_node.child_keys):
332
                        # All children have been processed, enqueue this parent
333
                        pending.append(parent_node)
334
                        # This has been queued up, stop tracking it
335
                        del num_seen_children[parent_key]
336
                    else:
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
337
                        num_seen_children[parent_key] = seen_children
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
338
        return result
4641.5.2 by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests.
339
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
340
    def merge_sort(self, tip_key):
341
        """Compute the merge sorted graph output."""
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
342
        from breezy import tsort
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
343
        as_parent_map = dict((node.key, node.parent_keys)
7479.2.1 by Jelmer Vernooij
Drop python2 support.
344
                             for node in self._nodes.values()
7143.15.2 by Jelmer Vernooij
Run autopep8.
345
                             if node.parent_keys is not None)
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
346
        # We intentionally always generate revnos and never force the
347
        # mainline_revisions
4593.5.32 by John Arbash Meinel
With the new api, we don't return 'sequence_number' anymore.
348
        # Strip the sequence_number that merge_sort generates
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
349
        return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
350
                for _, key, merge_depth, revno, end_of_merge
7143.15.2 by Jelmer Vernooij
Run autopep8.
351
                in tsort.merge_sort(as_parent_map, tip_key,
352
                                    mainline_revisions=None,
353
                                    generate_revno=True)]
354
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
355
    def get_parent_keys(self, key):
356
        """Get the parents for a key
7143.15.2 by Jelmer Vernooij
Run autopep8.
357
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
358
        Returns a list containg the parents keys. If the key is a ghost,
359
        None is returned. A KeyError will be raised if the key is not in
360
        the graph.
7143.15.2 by Jelmer Vernooij
Run autopep8.
361
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
362
        :param keys: Key to check (eg revision_id)
363
        :return: A list of parents
364
        """
365
        return self._nodes[key].parent_keys
366
367
    def get_child_keys(self, key):
368
        """Get the children for a key
7143.15.2 by Jelmer Vernooij
Run autopep8.
369
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
370
        Returns a list containg the children keys. A KeyError will be raised
371
        if the key is not in the graph.
7143.15.2 by Jelmer Vernooij
Run autopep8.
372
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
373
        :param keys: Key to check (eg revision_id)
374
        :return: A list of children
375
        """
376
        return self._nodes[key].child_keys