/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5273.1.5 by Vincent Ladeuil
Merge bzr.dev into cleanup
1
# Copyright (C) 2009, 2010 Canonical Ltd
4454.3.1 by John Arbash Meinel
Initial api for Annotator.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
6379.6.7 by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear.
17
"""Functionality for doing annotations in the 'optimal' way"""
18
6379.6.1 by Jelmer Vernooij
Import absolute_import in a few places.
19
from __future__ import absolute_import
20
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
21
from .lazy_import import lazy_import
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
22
lazy_import(globals(), """
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
23
from breezy import (
5279.1.1 by Andrew Bennetts
lazy_import most things in merge.py; add a few representative modules to the import tariff tests; tweak a couple of other modules so that patiencediff is not necessarily imported; remove a bunch of unused imports from test_knit.py.
24
    annotate, # Must be lazy to avoid circular importing
25
    graph as _mod_graph,
26
    patiencediff,
27
    )
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
28
""")
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
29
from . import (
4454.3.1 by John Arbash Meinel
Initial api for Annotator.
30
    errors,
31
    osutils,
4454.3.21 by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up.
32
    ui,
4454.3.1 by John Arbash Meinel
Initial api for Annotator.
33
    )
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
34
from .sixish import (
35
    range,
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
36
    viewitems,
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
37
    )
4454.3.1 by John Arbash Meinel
Initial api for Annotator.
38
39
40
class Annotator(object):
41
    """Class that drives performing annotations."""
42
43
    def __init__(self, vf):
44
        """Create a new Annotator from a VersionedFile."""
45
        self._vf = vf
4454.3.2 by John Arbash Meinel
Start moving bits into helper functions. Add tests for multiple revs.
46
        self._parent_map = {}
4454.3.8 by John Arbash Meinel
Factor out the 'get the lines to annotate' into a helper.
47
        self._text_cache = {}
4454.3.18 by John Arbash Meinel
Start tracking the number of children that need a given text.
48
        # Map from key => number of nexts that will be built from this key
49
        self._num_needed_children = {}
4454.3.3 by John Arbash Meinel
Start implementing the reannotation functionality directly.
50
        self._annotations_cache = {}
4454.3.41 by John Arbash Meinel
Cache the heads provider as long as we know that the parent_map hasn't changed.
51
        self._heads_provider = None
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
52
        self._ann_tuple_cache = {}
4454.3.1 by John Arbash Meinel
Initial api for Annotator.
53
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
54
    def _update_needed_children(self, key, parent_keys):
55
        for parent_key in parent_keys:
56
            if parent_key in self._num_needed_children:
57
                self._num_needed_children[parent_key] += 1
58
            else:
59
                self._num_needed_children[parent_key] = 1
60
4454.3.18 by John Arbash Meinel
Start tracking the number of children that need a given text.
61
    def _get_needed_keys(self, key):
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
62
        """Determine the texts we need to get from the backing vf.
63
64
        :return: (vf_keys_needed, ann_keys_needed)
65
            vf_keys_needed  These are keys that we need to get from the vf
66
            ann_keys_needed Texts which we have in self._text_cache but we
67
                            don't have annotations for. We need to yield these
68
                            in the proper order so that we can get proper
69
                            annotations.
70
        """
71
        parent_map = self._parent_map
4454.3.18 by John Arbash Meinel
Start tracking the number of children that need a given text.
72
        # We need 1 extra copy of the node we will be looking at when we are
73
        # done
74
        self._num_needed_children[key] = 1
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
75
        vf_keys_needed = set()
76
        ann_keys_needed = set()
6619.3.12 by Jelmer Vernooij
Use 2to3 set_literal fixer.
77
        needed_keys = {key}
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
78
        while needed_keys:
79
            parent_lookup = []
80
            next_parent_map = {}
81
            for key in needed_keys:
82
                if key in self._parent_map:
83
                    # We don't need to lookup this key in the vf
84
                    if key not in self._text_cache:
85
                        # Extract this text from the vf
86
                        vf_keys_needed.add(key)
87
                    elif key not in self._annotations_cache:
88
                        # We do need to annotate
89
                        ann_keys_needed.add(key)
90
                        next_parent_map[key] = self._parent_map[key]
4454.3.18 by John Arbash Meinel
Start tracking the number of children that need a given text.
91
                else:
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
92
                    parent_lookup.append(key)
93
                    vf_keys_needed.add(key)
94
            needed_keys = set()
95
            next_parent_map.update(self._vf.get_parent_map(parent_lookup))
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
96
            for key, parent_keys in viewitems(next_parent_map):
7143.15.1 by Jelmer Vernooij
Fix style issues.
97
                if parent_keys is None:  # No graph versionedfile
4454.3.66 by John Arbash Meinel
Implement no-graph support for the Python version.
98
                    parent_keys = ()
99
                    next_parent_map[key] = ()
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
100
                self._update_needed_children(key, parent_keys)
101
                needed_keys.update([key for key in parent_keys
7143.15.1 by Jelmer Vernooij
Fix style issues.
102
                                    if key not in parent_map])
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
103
            parent_map.update(next_parent_map)
7143.15.1 by Jelmer Vernooij
Fix style issues.
104
            # _heads_provider does some graph caching, so it is only valid
105
            # while self._parent_map hasn't changed
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
106
            self._heads_provider = None
107
        return vf_keys_needed, ann_keys_needed
4454.3.18 by John Arbash Meinel
Start tracking the number of children that need a given text.
108
4454.3.21 by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up.
109
    def _get_needed_texts(self, key, pb=None):
4454.3.8 by John Arbash Meinel
Factor out the 'get the lines to annotate' into a helper.
110
        """Get the texts we need to properly annotate key.
111
112
        :param key: A Key that is present in self._vf
113
        :return: Yield (this_key, text, num_lines)
114
            'text' is an opaque object that just has to work with whatever
115
            matcher object we are using. Currently it is always 'lines' but
116
            future improvements may change this to a simple text string.
117
        """
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
118
        keys, ann_keys = self._get_needed_keys(key)
4454.3.21 by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up.
119
        if pb is not None:
120
            pb.update('getting stream', 0, len(keys))
7143.15.1 by Jelmer Vernooij
Fix style issues.
121
        stream = self._vf.get_record_stream(keys, 'topological', True)
4454.3.21 by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up.
122
        for idx, record in enumerate(stream):
123
            if pb is not None:
124
                pb.update('extracting', 0, len(keys))
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
125
            if record.storage_kind == 'absent':
126
                raise errors.RevisionNotPresent(record.key, self._vf)
4454.3.8 by John Arbash Meinel
Factor out the 'get the lines to annotate' into a helper.
127
            this_key = record.key
128
            lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
129
            num_lines = len(lines)
4454.3.16 by John Arbash Meinel
Move more access patterns into helper functions.
130
            self._text_cache[this_key] = lines
4454.3.8 by John Arbash Meinel
Factor out the 'get the lines to annotate' into a helper.
131
            yield this_key, lines, num_lines
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
132
        for key in ann_keys:
133
            lines = self._text_cache[key]
134
            num_lines = len(lines)
135
            yield key, lines, num_lines
4454.3.2 by John Arbash Meinel
Start moving bits into helper functions. Add tests for multiple revs.
136
4454.3.38 by John Arbash Meinel
Start using left-matching-blocks during the actual annotation.
137
    def _get_parent_annotations_and_matches(self, key, text, parent_key):
4454.3.9 by John Arbash Meinel
Remove heads_provider, as we don't use it now.
138
        """Get the list of annotations for the parent, and the matching lines.
4454.3.2 by John Arbash Meinel
Start moving bits into helper functions. Add tests for multiple revs.
139
4454.3.9 by John Arbash Meinel
Remove heads_provider, as we don't use it now.
140
        :param text: The opaque value given by _get_needed_texts
141
        :param parent_key: The key for the parent text
142
        :return: (parent_annotations, matching_blocks)
143
            parent_annotations is a list as long as the number of lines in
144
                parent
145
            matching_blocks is a list of (parent_idx, text_idx, len) tuples
146
                indicating which lines match between the two texts
147
        """
4454.3.8 by John Arbash Meinel
Factor out the 'get the lines to annotate' into a helper.
148
        parent_lines = self._text_cache[parent_key]
4454.3.3 by John Arbash Meinel
Start implementing the reannotation functionality directly.
149
        parent_annotations = self._annotations_cache[parent_key]
150
        # PatienceSequenceMatcher should probably be part of Policy
7143.15.1 by Jelmer Vernooij
Fix style issues.
151
        matcher = patiencediff.PatienceSequenceMatcher(
152
            None, parent_lines, text)
4454.3.3 by John Arbash Meinel
Start implementing the reannotation functionality directly.
153
        matching_blocks = matcher.get_matching_blocks()
4454.3.4 by John Arbash Meinel
New work on how to resolve conflict lines.
154
        return parent_annotations, matching_blocks
155
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
156
    def _update_from_first_parent(self, key, annotations, lines, parent_key):
4454.3.4 by John Arbash Meinel
New work on how to resolve conflict lines.
157
        """Reannotate this text relative to its first parent."""
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
158
        (parent_annotations,
159
         matching_blocks) = self._get_parent_annotations_and_matches(
7143.15.1 by Jelmer Vernooij
Fix style issues.
160
             key, lines, parent_key)
4454.3.3 by John Arbash Meinel
Start implementing the reannotation functionality directly.
161
162
        for parent_idx, lines_idx, match_len in matching_blocks:
163
            # For all matching regions we copy across the parent annotations
164
            annotations[lines_idx:lines_idx + match_len] = \
165
                parent_annotations[parent_idx:parent_idx + match_len]
166
4454.3.38 by John Arbash Meinel
Start using left-matching-blocks during the actual annotation.
167
    def _update_from_other_parents(self, key, annotations, lines,
168
                                   this_annotation, parent_key):
4454.3.4 by John Arbash Meinel
New work on how to resolve conflict lines.
169
        """Reannotate this text relative to a second (or more) parent."""
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
170
        (parent_annotations,
171
         matching_blocks) = self._get_parent_annotations_and_matches(
7143.15.1 by Jelmer Vernooij
Fix style issues.
172
             key, lines, parent_key)
4454.3.4 by John Arbash Meinel
New work on how to resolve conflict lines.
173
4454.3.6 by John Arbash Meinel
Adding a trivial 'last_entry' cache drops the time from 56s down to 40s
174
        last_ann = None
175
        last_parent = None
176
        last_res = None
4454.3.7 by John Arbash Meinel
Some minor changes
177
        # TODO: consider making all annotations unique and then using 'is'
178
        #       everywhere. Current results claim that isn't any faster,
179
        #       because of the time spent deduping
4454.3.21 by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up.
180
        #       deduping also saves a bit of memory. For NEWS it saves ~1MB,
181
        #       but that is out of 200-300MB for extracting everything, so a
182
        #       fairly trivial amount
4454.3.4 by John Arbash Meinel
New work on how to resolve conflict lines.
183
        for parent_idx, lines_idx, match_len in matching_blocks:
184
            # For lines which match this parent, we will now resolve whether
185
            # this parent wins over the current annotation
4454.3.40 by John Arbash Meinel
Shave a bit more time off by using subset matching to skip whole regions.
186
            ann_sub = annotations[lines_idx:lines_idx + match_len]
187
            par_sub = parent_annotations[parent_idx:parent_idx + match_len]
188
            if ann_sub == par_sub:
189
                continue
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
190
            for idx in range(match_len):
4454.3.40 by John Arbash Meinel
Shave a bit more time off by using subset matching to skip whole regions.
191
                ann = ann_sub[idx]
192
                par_ann = par_sub[idx]
4454.3.4 by John Arbash Meinel
New work on how to resolve conflict lines.
193
                ann_idx = lines_idx + idx
194
                if ann == par_ann:
195
                    # Nothing to change
196
                    continue
4454.3.7 by John Arbash Meinel
Some minor changes
197
                if ann == this_annotation:
4454.3.4 by John Arbash Meinel
New work on how to resolve conflict lines.
198
                    # Originally claimed 'this', but it was really in this
199
                    # parent
200
                    annotations[ann_idx] = par_ann
201
                    continue
4454.3.7 by John Arbash Meinel
Some minor changes
202
                # Resolve the fact that both sides have a different value for
203
                # last modified
4454.3.6 by John Arbash Meinel
Adding a trivial 'last_entry' cache drops the time from 56s down to 40s
204
                if ann == last_ann and par_ann == last_parent:
205
                    annotations[ann_idx] = last_res
206
                else:
207
                    new_ann = set(ann)
208
                    new_ann.update(par_ann)
209
                    new_ann = tuple(sorted(new_ann))
210
                    annotations[ann_idx] = new_ann
211
                    last_ann = ann
212
                    last_parent = par_ann
213
                    last_res = new_ann
4454.3.4 by John Arbash Meinel
New work on how to resolve conflict lines.
214
4454.3.19 by John Arbash Meinel
Have _record_annotation start to remove texts when they are no longer needed.
215
    def _record_annotation(self, key, parent_keys, annotations):
4454.3.16 by John Arbash Meinel
Move more access patterns into helper functions.
216
        self._annotations_cache[key] = annotations
4454.3.19 by John Arbash Meinel
Have _record_annotation start to remove texts when they are no longer needed.
217
        for parent_key in parent_keys:
218
            num = self._num_needed_children[parent_key]
219
            num -= 1
220
            if num == 0:
221
                del self._text_cache[parent_key]
4454.3.21 by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up.
222
                del self._annotations_cache[parent_key]
4454.3.19 by John Arbash Meinel
Have _record_annotation start to remove texts when they are no longer needed.
223
                # Do we want to clean up _num_needed_children at this point as
224
                # well?
225
            self._num_needed_children[parent_key] = num
4454.3.16 by John Arbash Meinel
Move more access patterns into helper functions.
226
4454.3.22 by John Arbash Meinel
Need to record the other annotations before we can record this,
227
    def _annotate_one(self, key, text, num_lines):
228
        this_annotation = (key,)
229
        # Note: annotations will be mutated by calls to _update_from*
230
        annotations = [this_annotation] * num_lines
231
        parent_keys = self._parent_map[key]
232
        if parent_keys:
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
233
            self._update_from_first_parent(key, annotations, text,
234
                                           parent_keys[0])
4454.3.22 by John Arbash Meinel
Need to record the other annotations before we can record this,
235
            for parent in parent_keys[1:]:
4454.3.38 by John Arbash Meinel
Start using left-matching-blocks during the actual annotation.
236
                self._update_from_other_parents(key, annotations, text,
4454.3.22 by John Arbash Meinel
Need to record the other annotations before we can record this,
237
                                                this_annotation, parent)
238
        self._record_annotation(key, parent_keys, annotations)
239
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
240
    def add_special_text(self, key, parent_keys, text):
4454.3.74 by John Arbash Meinel
Some small tweaks, add more documentation for 'add_special_text'.
241
        """Add a specific text to the graph.
242
243
        This is used to add a text which is not otherwise present in the
244
        versioned file. (eg. a WorkingTree injecting 'current:' into the
245
        graph to annotate the edited content.)
246
247
        :param key: The key to use to request this text be annotated
248
        :param parent_keys: The parents of this text
249
        :param text: A string containing the content of the text
250
        """
4454.3.61 by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality.
251
        self._parent_map[key] = parent_keys
252
        self._text_cache[key] = osutils.split_lines(text)
253
        self._heads_provider = None
254
4454.3.2 by John Arbash Meinel
Start moving bits into helper functions. Add tests for multiple revs.
255
    def annotate(self, key):
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
256
        """Return annotated fulltext for the given key.
257
258
        :param key: A tuple defining the text to annotate
259
        :return: ([annotations], [lines])
260
            annotations is a list of tuples of keys, one for each line in lines
261
                        each key is a possible source for the given line.
262
            lines the text of "key" as a list of lines
263
        """
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
264
        with ui.ui_factory.nested_progress_bar() as pb:
7143.15.1 by Jelmer Vernooij
Fix style issues.
265
            for text_key, text, num_lines in self._get_needed_texts(
266
                    key, pb=pb):
4454.3.22 by John Arbash Meinel
Need to record the other annotations before we can record this,
267
                self._annotate_one(text_key, text, num_lines)
4454.3.1 by John Arbash Meinel
Initial api for Annotator.
268
        try:
4454.3.3 by John Arbash Meinel
Start implementing the reannotation functionality directly.
269
            annotations = self._annotations_cache[key]
270
        except KeyError:
4454.3.1 by John Arbash Meinel
Initial api for Annotator.
271
            raise errors.RevisionNotPresent(key, self._vf)
4454.3.8 by John Arbash Meinel
Factor out the 'get the lines to annotate' into a helper.
272
        return annotations, self._text_cache[key]
4454.3.10 by John Arbash Meinel
Start working on 'annotate_flat' which conforms to the original spec.
273
4454.3.41 by John Arbash Meinel
Cache the heads provider as long as we know that the parent_map hasn't changed.
274
    def _get_heads_provider(self):
275
        if self._heads_provider is None:
276
            self._heads_provider = _mod_graph.KnownGraph(self._parent_map)
277
        return self._heads_provider
278
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
279
    def _resolve_annotation_tie(self, the_heads, line, tiebreaker):
280
        if tiebreaker is None:
281
            head = sorted(the_heads)[0]
282
        else:
283
            # Backwards compatibility, break up the heads into pairs and
284
            # resolve the result
285
            next_head = iter(the_heads)
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
286
            head = next(next_head)
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
287
            for possible_head in next_head:
288
                annotated_lines = ((head, line), (possible_head, line))
289
                head = tiebreaker(annotated_lines)[0]
290
        return head
291
4454.3.10 by John Arbash Meinel
Start working on 'annotate_flat' which conforms to the original spec.
292
    def annotate_flat(self, key):
293
        """Determine the single-best-revision to source for each line.
294
295
        This is meant as a compatibility thunk to how annotate() used to work.
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
296
        :return: [(ann_key, line)]
297
            A list of tuples with a single annotation key for each line.
4454.3.10 by John Arbash Meinel
Start working on 'annotate_flat' which conforms to the original spec.
298
        """
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
299
        custom_tiebreaker = annotate._break_annotation_tie
4454.3.10 by John Arbash Meinel
Start working on 'annotate_flat' which conforms to the original spec.
300
        annotations, lines = self.annotate(key)
301
        out = []
4454.3.41 by John Arbash Meinel
Cache the heads provider as long as we know that the parent_map hasn't changed.
302
        heads = self._get_heads_provider().heads
4454.3.13 by John Arbash Meinel
A bit of simplification to the annotate_flat logic.
303
        append = out.append
4454.3.10 by John Arbash Meinel
Start working on 'annotate_flat' which conforms to the original spec.
304
        for annotation, line in zip(annotations, lines):
305
            if len(annotation) == 1:
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
306
                head = annotation[0]
4454.3.12 by John Arbash Meinel
Finish fleshing out the ability to determine a revision after conflicts.
307
            else:
308
                the_heads = heads(annotation)
309
                if len(the_heads) == 1:
7143.15.1 by Jelmer Vernooij
Fix style issues.
310
                    for head in the_heads:
311
                        break  # get the item out of the set
4454.3.12 by John Arbash Meinel
Finish fleshing out the ability to determine a revision after conflicts.
312
                else:
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
313
                    head = self._resolve_annotation_tie(the_heads, line,
314
                                                        custom_tiebreaker)
315
            append((head, line))
4454.3.10 by John Arbash Meinel
Start working on 'annotate_flat' which conforms to the original spec.
316
        return out