/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Implementation of Graph algorithms when we have already loaded everything.
18
"""
19
6379.6.7 by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear.
20
from __future__ import absolute_import
21
7206.3.3 by Jelmer Vernooij
Fix collections import.
22
try:
23
    from collections.abc import deque
24
except ImportError:  # python < 3.7
25
    from collections import deque
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
26
from . import (
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
27
    errors,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
28
    revision,
29
    )
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
30
from .sixish import (
31
    viewitems,
32
    viewvalues,
33
    )
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
34
35
36
class _KnownGraphNode(object):
37
    """Represents a single object in the known graph."""
38
4371.4.10 by Vincent Ladeuil
Cleanup.
39
    __slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
40
41
    def __init__(self, key, parent_keys):
42
        self.key = key
43
        self.parent_keys = parent_keys
44
        self.child_keys = []
45
        # Greatest distance from origin
46
        self.gdfo = None
47
48
    def __repr__(self):
4371.4.10 by Vincent Ladeuil
Cleanup.
49
        return '%s(%s  gdfo:%s par:%s child:%s)' % (
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
50
            self.__class__.__name__, self.key, self.gdfo,
4371.4.10 by Vincent Ladeuil
Cleanup.
51
            self.parent_keys, self.child_keys)
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
52
53
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
54
class _MergeSortNode(object):
55
    """Information about a specific node in the merge graph."""
56
57
    __slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
58
59
    def __init__(self, key, merge_depth, revno, end_of_merge):
60
        self.key = key
61
        self.merge_depth = merge_depth
62
        self.revno = revno
63
        self.end_of_merge = end_of_merge
64
65
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
66
class KnownGraph(object):
67
    """This is a class which assumes we already know the full graph."""
68
69
    def __init__(self, parent_map, do_cache=True):
70
        """Create a new KnownGraph instance.
71
72
        :param parent_map: A dictionary mapping key => parent_keys
73
        """
74
        self._nodes = {}
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
75
        # Maps {frozenset(revision_id, revision_id): heads}
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
76
        self._known_heads = {}
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
77
        self.do_cache = do_cache
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
78
        self._initialize_nodes(parent_map)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
79
        self._find_gdfo()
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
80
81
    def _initialize_nodes(self, parent_map):
82
        """Populate self._nodes.
83
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
84
        After this has finished:
85
        - self._nodes will have an entry for every entry in parent_map.
86
        - ghosts will have a parent_keys = None,
87
        - all nodes found will also have .child_keys populated with all known
88
          child_keys,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
89
        """
90
        nodes = self._nodes
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
91
        for key, parent_keys in viewitems(parent_map):
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
92
            if key in nodes:
93
                node = nodes[key]
94
                node.parent_keys = parent_keys
95
            else:
96
                node = _KnownGraphNode(key, parent_keys)
97
                nodes[key] = node
98
            for parent_key in parent_keys:
99
                try:
100
                    parent_node = nodes[parent_key]
101
                except KeyError:
102
                    parent_node = _KnownGraphNode(parent_key, None)
103
                    nodes[parent_key] = parent_node
104
                parent_node.child_keys.append(key)
105
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
106
    def _find_tails(self):
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
107
        return [node for node in viewvalues(self._nodes)
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
108
                if not node.parent_keys]
109
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
110
    def _find_tips(self):
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
111
        return [node for node in viewvalues(self._nodes)
7143.15.2 by Jelmer Vernooij
Run autopep8.
112
                if not node.child_keys]
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
113
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
114
    def _find_gdfo(self):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
115
        nodes = self._nodes
4371.4.10 by Vincent Ladeuil
Cleanup.
116
        known_parent_gdfos = {}
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
117
        pending = []
118
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
119
        for node in self._find_tails():
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
120
            node.gdfo = 1
121
            pending.append(node)
122
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
123
        while pending:
124
            node = pending.pop()
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
125
            for child_key in node.child_keys:
126
                child = nodes[child_key]
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
127
                if child_key in known_parent_gdfos:
128
                    known_gdfo = known_parent_gdfos[child_key] + 1
129
                    present = True
130
                else:
131
                    known_gdfo = 1
132
                    present = False
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
133
                if child.gdfo is None or node.gdfo + 1 > child.gdfo:
134
                    child.gdfo = node.gdfo + 1
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
135
                if known_gdfo == len(child.parent_keys):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
136
                    # We are the last parent updating that node, we can
137
                    # continue from there
4371.4.2 by Vincent Ladeuil
Same gdfo processing, without recursion.
138
                    pending.append(child)
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
139
                    if present:
140
                        del known_parent_gdfos[child_key]
141
                else:
142
                    # Update known_parent_gdfos for a key we couldn't process
143
                    known_parent_gdfos[child_key] = known_gdfo
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
144
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
145
    def add_node(self, key, parent_keys):
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
146
        """Add a new node to the graph.
147
148
        If this fills in a ghost, then the gdfos of all children will be
149
        updated accordingly.
7143.15.2 by Jelmer Vernooij
Run autopep8.
150
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
151
        :param key: The node being added. If this is a duplicate, this is a
152
            no-op.
153
        :param parent_keys: The parents of the given node.
154
        :return: None (should we return if this was a ghost, etc?)
155
        """
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
156
        nodes = self._nodes
157
        if key in nodes:
158
            node = nodes[key]
159
            if node.parent_keys is None:
160
                node.parent_keys = parent_keys
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
161
                # A ghost is being added, we can no-longer trust the heads
162
                # cache, so clear it
163
                self._known_heads.clear()
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
164
            else:
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
165
                # Make sure we compare a list to a list, as tuple != list.
166
                parent_keys = list(parent_keys)
167
                existing_parent_keys = list(node.parent_keys)
168
                if parent_keys == existing_parent_keys:
7143.15.2 by Jelmer Vernooij
Run autopep8.
169
                    return  # Identical content
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
170
                else:
7143.15.5 by Jelmer Vernooij
More PEP8 fixes.
171
                    raise ValueError(
172
                        'Parent key mismatch, existing node %s'
173
                        ' has parents of %s not %s'
174
                        % (key, existing_parent_keys, parent_keys))
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
175
        else:
176
            node = _KnownGraphNode(key, parent_keys)
177
            nodes[key] = node
178
        parent_gdfo = 0
179
        for parent_key in parent_keys:
180
            try:
181
                parent_node = nodes[parent_key]
182
            except KeyError:
183
                parent_node = _KnownGraphNode(parent_key, None)
184
                # Ghosts and roots have gdfo 1
185
                parent_node.gdfo = 1
186
                nodes[parent_key] = parent_node
187
            if parent_gdfo < parent_node.gdfo:
188
                parent_gdfo = parent_node.gdfo
189
            parent_node.child_keys.append(key)
190
        node.gdfo = parent_gdfo + 1
191
        # Now fill the gdfo to all children
192
        # Note that this loop is slightly inefficient, in that we may visit the
193
        # same child (and its decendents) more than once, however, it is
194
        # 'efficient' in that we only walk to nodes that would be updated,
195
        # rather than all nodes
196
        # We use a deque rather than a simple list stack, to go for BFD rather
197
        # than DFD. So that if a longer path is possible, we walk it before we
198
        # get to the final child
7206.3.10 by Jelmer Vernooij
Fix two more tests.
199
        pending = deque([node])
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
200
        while pending:
201
            node = pending.popleft()
202
            next_gdfo = node.gdfo + 1
203
            for child_key in node.child_keys:
204
                child = nodes[child_key]
205
                if child.gdfo < next_gdfo:
206
                    # This child is being updated, we need to check its
207
                    # children
208
                    child.gdfo = next_gdfo
209
                    pending.append(child)
210
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
211
    def heads(self, keys):
212
        """Return the heads from amongst keys.
213
214
        This is done by searching the ancestries of each key.  Any key that is
215
        reachable from another key is not returned; all the others are.
216
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
217
        This operation scales with the relative depth between any two keys. It
218
        uses gdfo to avoid walking all ancestry.
219
220
        :param keys: An iterable of keys.
221
        :return: A set of the heads. Note that as a set there is no ordering
222
            information. Callers will need to filter their input to create
223
            order if they need it.
224
        """
225
        candidate_nodes = dict((key, self._nodes[key]) for key in keys)
226
        if revision.NULL_REVISION in candidate_nodes:
227
            # NULL_REVISION is only a head if it is the only entry
228
            candidate_nodes.pop(revision.NULL_REVISION)
229
            if not candidate_nodes:
230
                return frozenset([revision.NULL_REVISION])
231
        if len(candidate_nodes) < 2:
232
            # No or only one candidate
233
            return frozenset(candidate_nodes)
234
        heads_key = frozenset(candidate_nodes)
235
        # Do we have a cached result ?
236
        try:
237
            heads = self._known_heads[heads_key]
238
            return heads
239
        except KeyError:
240
            pass
241
        # Let's compute the heads
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
242
        seen = set()
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
243
        pending = []
244
        min_gdfo = None
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
245
        for node in viewvalues(candidate_nodes):
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
246
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
247
                pending.extend(node.parent_keys)
248
            if min_gdfo is None or node.gdfo < min_gdfo:
249
                min_gdfo = node.gdfo
250
        nodes = self._nodes
251
        while pending:
252
            node_key = pending.pop()
253
            if node_key in seen:
254
                # node already appears in some ancestry
255
                continue
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
256
            seen.add(node_key)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
257
            node = nodes[node_key]
258
            if node.gdfo <= min_gdfo:
259
                continue
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
260
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
261
                pending.extend(node.parent_keys)
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
262
        heads = heads_key.difference(seen)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
263
        if self.do_cache:
264
            self._known_heads[heads_key] = heads
265
        return heads
266
4593.5.2 by John Arbash Meinel
Implement a very basic version of KnownGraph.topo_sort that just
267
    def topo_sort(self):
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
268
        """Return the nodes in topological order.
269
270
        All parents must occur before all children.
271
        """
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
272
        for node in viewvalues(self._nodes):
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
273
            if node.gdfo is None:
274
                raise errors.GraphCycleError(self._nodes)
275
        pending = self._find_tails()
276
        pending_pop = pending.pop
277
        pending_append = pending.append
278
279
        topo_order = []
280
        topo_order_append = topo_order.append
281
282
        num_seen_parents = dict.fromkeys(self._nodes, 0)
283
        while pending:
284
            node = pending_pop()
285
            if node.parent_keys is not None:
286
                # We don't include ghost parents
287
                topo_order_append(node.key)
288
            for child_key in node.child_keys:
289
                child_node = self._nodes[child_key]
290
                seen_parents = num_seen_parents[child_key] + 1
291
                if seen_parents == len(child_node.parent_keys):
292
                    # All parents have been processed, enqueue this child
293
                    pending_append(child_node)
294
                    # This has been queued up, stop tracking it
295
                    del num_seen_parents[child_key]
296
                else:
297
                    num_seen_parents[child_key] = seen_parents
298
        # We started from the parents, so we don't need to do anymore work
299
        return topo_order
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
300
4641.5.2 by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests.
301
    def gc_sort(self):
302
        """Return a reverse topological ordering which is 'stable'.
303
304
        There are a few constraints:
305
          1) Reverse topological (all children before all parents)
306
          2) Grouped by prefix
307
          3) 'stable' sorting, so that we get the same result, independent of
308
             machine, or extra data.
309
        To do this, we use the same basic algorithm as topo_sort, but when we
310
        aren't sure what node to access next, we sort them lexicographically.
311
        """
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
312
        tips = self._find_tips()
313
        # Split the tips based on prefix
314
        prefix_tips = {}
315
        for node in tips:
316
            if node.key.__class__ is str or len(node.key) == 1:
317
                prefix = ''
318
            else:
319
                prefix = node.key[0]
320
            prefix_tips.setdefault(prefix, []).append(node)
321
322
        num_seen_children = dict.fromkeys(self._nodes, 0)
323
324
        result = []
325
        for prefix in sorted(prefix_tips):
7143.15.2 by Jelmer Vernooij
Run autopep8.
326
            pending = sorted(prefix_tips[prefix], key=lambda n: n.key,
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
327
                             reverse=True)
328
            while pending:
329
                node = pending.pop()
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
330
                if node.parent_keys is None:
331
                    # Ghost node, skip it
332
                    continue
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
333
                result.append(node.key)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
334
                for parent_key in sorted(node.parent_keys, reverse=True):
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
335
                    parent_node = self._nodes[parent_key]
336
                    seen_children = num_seen_children[parent_key] + 1
337
                    if seen_children == len(parent_node.child_keys):
338
                        # All children have been processed, enqueue this parent
339
                        pending.append(parent_node)
340
                        # This has been queued up, stop tracking it
341
                        del num_seen_children[parent_key]
342
                    else:
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
343
                        num_seen_children[parent_key] = seen_children
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
344
        return result
4641.5.2 by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests.
345
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
346
    def merge_sort(self, tip_key):
347
        """Compute the merge sorted graph output."""
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
348
        from breezy import tsort
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
349
        as_parent_map = dict((node.key, node.parent_keys)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
350
                             for node in viewvalues(self._nodes)
7143.15.2 by Jelmer Vernooij
Run autopep8.
351
                             if node.parent_keys is not None)
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
352
        # We intentionally always generate revnos and never force the
353
        # mainline_revisions
4593.5.32 by John Arbash Meinel
With the new api, we don't return 'sequence_number' anymore.
354
        # Strip the sequence_number that merge_sort generates
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
355
        return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
356
                for _, key, merge_depth, revno, end_of_merge
7143.15.2 by Jelmer Vernooij
Run autopep8.
357
                in tsort.merge_sort(as_parent_map, tip_key,
358
                                    mainline_revisions=None,
359
                                    generate_revno=True)]
360
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
361
    def get_parent_keys(self, key):
362
        """Get the parents for a key
7143.15.2 by Jelmer Vernooij
Run autopep8.
363
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
364
        Returns a list containg the parents keys. If the key is a ghost,
365
        None is returned. A KeyError will be raised if the key is not in
366
        the graph.
7143.15.2 by Jelmer Vernooij
Run autopep8.
367
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
368
        :param keys: Key to check (eg revision_id)
369
        :return: A list of parents
370
        """
371
        return self._nodes[key].parent_keys
372
373
    def get_child_keys(self, key):
374
        """Get the children for a key
7143.15.2 by Jelmer Vernooij
Run autopep8.
375
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
376
        Returns a list containg the children keys. A KeyError will be raised
377
        if the key is not in the graph.
7143.15.2 by Jelmer Vernooij
Run autopep8.
378
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
379
        :param keys: Key to check (eg revision_id)
380
        :return: A list of children
381
        """
382
        return self._nodes[key].child_keys