/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Implementation of Graph algorithms when we have already loaded everything.
18
"""
19
6379.6.7 by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear.
20
from __future__ import absolute_import
21
6800.1.3 by Jelmer Vernooij
Avoid modifying possibly lazily imported module.
22
import collections
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
23
from . import (
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
24
    errors,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
25
    revision,
26
    )
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
27
from .sixish import (
28
    viewitems,
29
    viewvalues,
30
    )
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
31
32
33
class _KnownGraphNode(object):
34
    """Represents a single object in the known graph."""
35
4371.4.10 by Vincent Ladeuil
Cleanup.
36
    __slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
37
38
    def __init__(self, key, parent_keys):
39
        self.key = key
40
        self.parent_keys = parent_keys
41
        self.child_keys = []
42
        # Greatest distance from origin
43
        self.gdfo = None
44
45
    def __repr__(self):
4371.4.10 by Vincent Ladeuil
Cleanup.
46
        return '%s(%s  gdfo:%s par:%s child:%s)' % (
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
47
            self.__class__.__name__, self.key, self.gdfo,
4371.4.10 by Vincent Ladeuil
Cleanup.
48
            self.parent_keys, self.child_keys)
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
49
50
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
51
class _MergeSortNode(object):
52
    """Information about a specific node in the merge graph."""
53
54
    __slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
55
56
    def __init__(self, key, merge_depth, revno, end_of_merge):
57
        self.key = key
58
        self.merge_depth = merge_depth
59
        self.revno = revno
60
        self.end_of_merge = end_of_merge
61
62
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
63
class KnownGraph(object):
64
    """This is a class which assumes we already know the full graph."""
65
66
    def __init__(self, parent_map, do_cache=True):
67
        """Create a new KnownGraph instance.
68
69
        :param parent_map: A dictionary mapping key => parent_keys
70
        """
71
        self._nodes = {}
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
72
        # Maps {frozenset(revision_id, revision_id): heads}
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
73
        self._known_heads = {}
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
74
        self.do_cache = do_cache
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
75
        self._initialize_nodes(parent_map)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
76
        self._find_gdfo()
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
77
78
    def _initialize_nodes(self, parent_map):
79
        """Populate self._nodes.
80
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
81
        After this has finished:
82
        - self._nodes will have an entry for every entry in parent_map.
83
        - ghosts will have a parent_keys = None,
84
        - all nodes found will also have .child_keys populated with all known
85
          child_keys,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
86
        """
87
        nodes = self._nodes
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
88
        for key, parent_keys in viewitems(parent_map):
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
89
            if key in nodes:
90
                node = nodes[key]
91
                node.parent_keys = parent_keys
92
            else:
93
                node = _KnownGraphNode(key, parent_keys)
94
                nodes[key] = node
95
            for parent_key in parent_keys:
96
                try:
97
                    parent_node = nodes[parent_key]
98
                except KeyError:
99
                    parent_node = _KnownGraphNode(parent_key, None)
100
                    nodes[parent_key] = parent_node
101
                parent_node.child_keys.append(key)
102
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
103
    def _find_tails(self):
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
104
        return [node for node in viewvalues(self._nodes)
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
105
                if not node.parent_keys]
106
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
107
    def _find_tips(self):
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
108
        return [node for node in viewvalues(self._nodes)
7143.15.2 by Jelmer Vernooij
Run autopep8.
109
                if not node.child_keys]
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
110
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
111
    def _find_gdfo(self):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
112
        nodes = self._nodes
4371.4.10 by Vincent Ladeuil
Cleanup.
113
        known_parent_gdfos = {}
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
114
        pending = []
115
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
116
        for node in self._find_tails():
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
117
            node.gdfo = 1
118
            pending.append(node)
119
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
120
        while pending:
121
            node = pending.pop()
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
122
            for child_key in node.child_keys:
123
                child = nodes[child_key]
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
124
                if child_key in known_parent_gdfos:
125
                    known_gdfo = known_parent_gdfos[child_key] + 1
126
                    present = True
127
                else:
128
                    known_gdfo = 1
129
                    present = False
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
130
                if child.gdfo is None or node.gdfo + 1 > child.gdfo:
131
                    child.gdfo = node.gdfo + 1
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
132
                if known_gdfo == len(child.parent_keys):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
133
                    # We are the last parent updating that node, we can
134
                    # continue from there
4371.4.2 by Vincent Ladeuil
Same gdfo processing, without recursion.
135
                    pending.append(child)
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
136
                    if present:
137
                        del known_parent_gdfos[child_key]
138
                else:
139
                    # Update known_parent_gdfos for a key we couldn't process
140
                    known_parent_gdfos[child_key] = known_gdfo
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
141
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
142
    def add_node(self, key, parent_keys):
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
143
        """Add a new node to the graph.
144
145
        If this fills in a ghost, then the gdfos of all children will be
146
        updated accordingly.
7143.15.2 by Jelmer Vernooij
Run autopep8.
147
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
148
        :param key: The node being added. If this is a duplicate, this is a
149
            no-op.
150
        :param parent_keys: The parents of the given node.
151
        :return: None (should we return if this was a ghost, etc?)
152
        """
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
153
        nodes = self._nodes
154
        if key in nodes:
155
            node = nodes[key]
156
            if node.parent_keys is None:
157
                node.parent_keys = parent_keys
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
158
                # A ghost is being added, we can no-longer trust the heads
159
                # cache, so clear it
160
                self._known_heads.clear()
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
161
            else:
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
162
                # Make sure we compare a list to a list, as tuple != list.
163
                parent_keys = list(parent_keys)
164
                existing_parent_keys = list(node.parent_keys)
165
                if parent_keys == existing_parent_keys:
7143.15.2 by Jelmer Vernooij
Run autopep8.
166
                    return  # Identical content
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
167
                else:
7143.15.5 by Jelmer Vernooij
More PEP8 fixes.
168
                    raise ValueError(
169
                        'Parent key mismatch, existing node %s'
170
                        ' has parents of %s not %s'
171
                        % (key, existing_parent_keys, parent_keys))
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
172
        else:
173
            node = _KnownGraphNode(key, parent_keys)
174
            nodes[key] = node
175
        parent_gdfo = 0
176
        for parent_key in parent_keys:
177
            try:
178
                parent_node = nodes[parent_key]
179
            except KeyError:
180
                parent_node = _KnownGraphNode(parent_key, None)
181
                # Ghosts and roots have gdfo 1
182
                parent_node.gdfo = 1
183
                nodes[parent_key] = parent_node
184
            if parent_gdfo < parent_node.gdfo:
185
                parent_gdfo = parent_node.gdfo
186
            parent_node.child_keys.append(key)
187
        node.gdfo = parent_gdfo + 1
188
        # Now fill the gdfo to all children
189
        # Note that this loop is slightly inefficient, in that we may visit the
190
        # same child (and its decendents) more than once, however, it is
191
        # 'efficient' in that we only walk to nodes that would be updated,
192
        # rather than all nodes
193
        # We use a deque rather than a simple list stack, to go for BFD rather
194
        # than DFD. So that if a longer path is possible, we walk it before we
195
        # get to the final child
6800.1.3 by Jelmer Vernooij
Avoid modifying possibly lazily imported module.
196
        pending = collections.deque([node])
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
197
        while pending:
198
            node = pending.popleft()
199
            next_gdfo = node.gdfo + 1
200
            for child_key in node.child_keys:
201
                child = nodes[child_key]
202
                if child.gdfo < next_gdfo:
203
                    # This child is being updated, we need to check its
204
                    # children
205
                    child.gdfo = next_gdfo
206
                    pending.append(child)
207
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
208
    def heads(self, keys):
209
        """Return the heads from amongst keys.
210
211
        This is done by searching the ancestries of each key.  Any key that is
212
        reachable from another key is not returned; all the others are.
213
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
214
        This operation scales with the relative depth between any two keys. It
215
        uses gdfo to avoid walking all ancestry.
216
217
        :param keys: An iterable of keys.
218
        :return: A set of the heads. Note that as a set there is no ordering
219
            information. Callers will need to filter their input to create
220
            order if they need it.
221
        """
222
        candidate_nodes = dict((key, self._nodes[key]) for key in keys)
223
        if revision.NULL_REVISION in candidate_nodes:
224
            # NULL_REVISION is only a head if it is the only entry
225
            candidate_nodes.pop(revision.NULL_REVISION)
226
            if not candidate_nodes:
227
                return frozenset([revision.NULL_REVISION])
228
        if len(candidate_nodes) < 2:
229
            # No or only one candidate
230
            return frozenset(candidate_nodes)
231
        heads_key = frozenset(candidate_nodes)
232
        # Do we have a cached result ?
233
        try:
234
            heads = self._known_heads[heads_key]
235
            return heads
236
        except KeyError:
237
            pass
238
        # Let's compute the heads
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
239
        seen = set()
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
240
        pending = []
241
        min_gdfo = None
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
242
        for node in viewvalues(candidate_nodes):
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
243
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
244
                pending.extend(node.parent_keys)
245
            if min_gdfo is None or node.gdfo < min_gdfo:
246
                min_gdfo = node.gdfo
247
        nodes = self._nodes
248
        while pending:
249
            node_key = pending.pop()
250
            if node_key in seen:
251
                # node already appears in some ancestry
252
                continue
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
253
            seen.add(node_key)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
254
            node = nodes[node_key]
255
            if node.gdfo <= min_gdfo:
256
                continue
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
257
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
258
                pending.extend(node.parent_keys)
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
259
        heads = heads_key.difference(seen)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
260
        if self.do_cache:
261
            self._known_heads[heads_key] = heads
262
        return heads
263
4593.5.2 by John Arbash Meinel
Implement a very basic version of KnownGraph.topo_sort that just
264
    def topo_sort(self):
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
265
        """Return the nodes in topological order.
266
267
        All parents must occur before all children.
268
        """
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
269
        for node in viewvalues(self._nodes):
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
270
            if node.gdfo is None:
271
                raise errors.GraphCycleError(self._nodes)
272
        pending = self._find_tails()
273
        pending_pop = pending.pop
274
        pending_append = pending.append
275
276
        topo_order = []
277
        topo_order_append = topo_order.append
278
279
        num_seen_parents = dict.fromkeys(self._nodes, 0)
280
        while pending:
281
            node = pending_pop()
282
            if node.parent_keys is not None:
283
                # We don't include ghost parents
284
                topo_order_append(node.key)
285
            for child_key in node.child_keys:
286
                child_node = self._nodes[child_key]
287
                seen_parents = num_seen_parents[child_key] + 1
288
                if seen_parents == len(child_node.parent_keys):
289
                    # All parents have been processed, enqueue this child
290
                    pending_append(child_node)
291
                    # This has been queued up, stop tracking it
292
                    del num_seen_parents[child_key]
293
                else:
294
                    num_seen_parents[child_key] = seen_parents
295
        # We started from the parents, so we don't need to do anymore work
296
        return topo_order
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
297
4641.5.2 by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests.
298
    def gc_sort(self):
299
        """Return a reverse topological ordering which is 'stable'.
300
301
        There are a few constraints:
302
          1) Reverse topological (all children before all parents)
303
          2) Grouped by prefix
304
          3) 'stable' sorting, so that we get the same result, independent of
305
             machine, or extra data.
306
        To do this, we use the same basic algorithm as topo_sort, but when we
307
        aren't sure what node to access next, we sort them lexicographically.
308
        """
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
309
        tips = self._find_tips()
310
        # Split the tips based on prefix
311
        prefix_tips = {}
312
        for node in tips:
313
            if node.key.__class__ is str or len(node.key) == 1:
314
                prefix = ''
315
            else:
316
                prefix = node.key[0]
317
            prefix_tips.setdefault(prefix, []).append(node)
318
319
        num_seen_children = dict.fromkeys(self._nodes, 0)
320
321
        result = []
322
        for prefix in sorted(prefix_tips):
7143.15.2 by Jelmer Vernooij
Run autopep8.
323
            pending = sorted(prefix_tips[prefix], key=lambda n: n.key,
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
324
                             reverse=True)
325
            while pending:
326
                node = pending.pop()
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
327
                if node.parent_keys is None:
328
                    # Ghost node, skip it
329
                    continue
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
330
                result.append(node.key)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
331
                for parent_key in sorted(node.parent_keys, reverse=True):
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
332
                    parent_node = self._nodes[parent_key]
333
                    seen_children = num_seen_children[parent_key] + 1
334
                    if seen_children == len(parent_node.child_keys):
335
                        # All children have been processed, enqueue this parent
336
                        pending.append(parent_node)
337
                        # This has been queued up, stop tracking it
338
                        del num_seen_children[parent_key]
339
                    else:
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
340
                        num_seen_children[parent_key] = seen_children
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
341
        return result
4641.5.2 by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests.
342
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
343
    def merge_sort(self, tip_key):
344
        """Compute the merge sorted graph output."""
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
345
        from breezy import tsort
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
346
        as_parent_map = dict((node.key, node.parent_keys)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
347
                             for node in viewvalues(self._nodes)
7143.15.2 by Jelmer Vernooij
Run autopep8.
348
                             if node.parent_keys is not None)
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
349
        # We intentionally always generate revnos and never force the
350
        # mainline_revisions
4593.5.32 by John Arbash Meinel
With the new api, we don't return 'sequence_number' anymore.
351
        # Strip the sequence_number that merge_sort generates
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
352
        return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
353
                for _, key, merge_depth, revno, end_of_merge
7143.15.2 by Jelmer Vernooij
Run autopep8.
354
                in tsort.merge_sort(as_parent_map, tip_key,
355
                                    mainline_revisions=None,
356
                                    generate_revno=True)]
357
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
358
    def get_parent_keys(self, key):
359
        """Get the parents for a key
7143.15.2 by Jelmer Vernooij
Run autopep8.
360
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
361
        Returns a list containg the parents keys. If the key is a ghost,
362
        None is returned. A KeyError will be raised if the key is not in
363
        the graph.
7143.15.2 by Jelmer Vernooij
Run autopep8.
364
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
365
        :param keys: Key to check (eg revision_id)
366
        :return: A list of parents
367
        """
368
        return self._nodes[key].parent_keys
369
370
    def get_child_keys(self, key):
371
        """Get the children for a key
7143.15.2 by Jelmer Vernooij
Run autopep8.
372
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
373
        Returns a list containg the children keys. A KeyError will be raised
374
        if the key is not in the graph.
7143.15.2 by Jelmer Vernooij
Run autopep8.
375
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
376
        :param keys: Key to check (eg revision_id)
377
        :return: A list of children
378
        """
379
        return self._nodes[key].child_keys