1
# Copyright (C) 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Functionality for doing annotations in the 'optimal' way"""
19
from __future__ import absolute_import
21
from .lazy_import import lazy_import
22
lazy_import(globals(), """
27
annotate, # Must be lazy to avoid circular importing
38
class Annotator(object):
39
"""Class that drives performing annotations."""
41
def __init__(self, vf):
42
"""Create a new Annotator from a VersionedFile."""
46
# Map from key => number of nexts that will be built from this key
47
self._num_needed_children = {}
48
self._annotations_cache = {}
49
self._heads_provider = None
50
self._ann_tuple_cache = {}
52
def _update_needed_children(self, key, parent_keys):
53
for parent_key in parent_keys:
54
if parent_key in self._num_needed_children:
55
self._num_needed_children[parent_key] += 1
57
self._num_needed_children[parent_key] = 1
59
def _get_needed_keys(self, key):
60
"""Determine the texts we need to get from the backing vf.
62
:return: (vf_keys_needed, ann_keys_needed)
63
vf_keys_needed These are keys that we need to get from the vf
64
ann_keys_needed Texts which we have in self._text_cache but we
65
don't have annotations for. We need to yield these
66
in the proper order so that we can get proper
69
parent_map = self._parent_map
70
# We need 1 extra copy of the node we will be looking at when we are
72
self._num_needed_children[key] = 1
73
vf_keys_needed = set()
74
ann_keys_needed = set()
79
for key in needed_keys:
80
if key in self._parent_map:
81
# We don't need to lookup this key in the vf
82
if key not in self._text_cache:
83
# Extract this text from the vf
84
vf_keys_needed.add(key)
85
elif key not in self._annotations_cache:
86
# We do need to annotate
87
ann_keys_needed.add(key)
88
next_parent_map[key] = self._parent_map[key]
90
parent_lookup.append(key)
91
vf_keys_needed.add(key)
93
next_parent_map.update(self._vf.get_parent_map(parent_lookup))
94
for key, parent_keys in next_parent_map.items():
95
if parent_keys is None: # No graph versionedfile
97
next_parent_map[key] = ()
98
self._update_needed_children(key, parent_keys)
99
needed_keys.update([key for key in parent_keys
100
if key not in parent_map])
101
parent_map.update(next_parent_map)
102
# _heads_provider does some graph caching, so it is only valid
103
# while self._parent_map hasn't changed
104
self._heads_provider = None
105
return vf_keys_needed, ann_keys_needed
107
def _get_needed_texts(self, key, pb=None):
108
"""Get the texts we need to properly annotate key.
110
:param key: A Key that is present in self._vf
111
:return: Yield (this_key, text, num_lines)
112
'text' is an opaque object that just has to work with whatever
113
matcher object we are using. Currently it is always 'lines' but
114
future improvements may change this to a simple text string.
116
keys, ann_keys = self._get_needed_keys(key)
118
pb.update('getting stream', 0, len(keys))
119
stream = self._vf.get_record_stream(keys, 'topological', True)
120
for idx, record in enumerate(stream):
122
pb.update('extracting', 0, len(keys))
123
if record.storage_kind == 'absent':
124
raise errors.RevisionNotPresent(record.key, self._vf)
125
this_key = record.key
126
lines = record.get_bytes_as('lines')
127
num_lines = len(lines)
128
self._text_cache[this_key] = lines
129
yield this_key, lines, num_lines
131
lines = self._text_cache[key]
132
num_lines = len(lines)
133
yield key, lines, num_lines
135
def _get_parent_annotations_and_matches(self, key, text, parent_key):
136
"""Get the list of annotations for the parent, and the matching lines.
138
:param text: The opaque value given by _get_needed_texts
139
:param parent_key: The key for the parent text
140
:return: (parent_annotations, matching_blocks)
141
parent_annotations is a list as long as the number of lines in
143
matching_blocks is a list of (parent_idx, text_idx, len) tuples
144
indicating which lines match between the two texts
146
parent_lines = self._text_cache[parent_key]
147
parent_annotations = self._annotations_cache[parent_key]
148
# PatienceSequenceMatcher should probably be part of Policy
149
matcher = patiencediff.PatienceSequenceMatcher(
150
None, parent_lines, text)
151
matching_blocks = matcher.get_matching_blocks()
152
return parent_annotations, matching_blocks
154
def _update_from_first_parent(self, key, annotations, lines, parent_key):
155
"""Reannotate this text relative to its first parent."""
157
matching_blocks) = self._get_parent_annotations_and_matches(
158
key, lines, parent_key)
160
for parent_idx, lines_idx, match_len in matching_blocks:
161
# For all matching regions we copy across the parent annotations
162
annotations[lines_idx:lines_idx + match_len] = \
163
parent_annotations[parent_idx:parent_idx + match_len]
165
def _update_from_other_parents(self, key, annotations, lines,
166
this_annotation, parent_key):
167
"""Reannotate this text relative to a second (or more) parent."""
169
matching_blocks) = self._get_parent_annotations_and_matches(
170
key, lines, parent_key)
175
# TODO: consider making all annotations unique and then using 'is'
176
# everywhere. Current results claim that isn't any faster,
177
# because of the time spent deduping
178
# deduping also saves a bit of memory. For NEWS it saves ~1MB,
179
# but that is out of 200-300MB for extracting everything, so a
180
# fairly trivial amount
181
for parent_idx, lines_idx, match_len in matching_blocks:
182
# For lines which match this parent, we will now resolve whether
183
# this parent wins over the current annotation
184
ann_sub = annotations[lines_idx:lines_idx + match_len]
185
par_sub = parent_annotations[parent_idx:parent_idx + match_len]
186
if ann_sub == par_sub:
188
for idx in range(match_len):
190
par_ann = par_sub[idx]
191
ann_idx = lines_idx + idx
195
if ann == this_annotation:
196
# Originally claimed 'this', but it was really in this
198
annotations[ann_idx] = par_ann
200
# Resolve the fact that both sides have a different value for
202
if ann == last_ann and par_ann == last_parent:
203
annotations[ann_idx] = last_res
206
new_ann.update(par_ann)
207
new_ann = tuple(sorted(new_ann))
208
annotations[ann_idx] = new_ann
210
last_parent = par_ann
213
def _record_annotation(self, key, parent_keys, annotations):
214
self._annotations_cache[key] = annotations
215
for parent_key in parent_keys:
216
num = self._num_needed_children[parent_key]
219
del self._text_cache[parent_key]
220
del self._annotations_cache[parent_key]
221
# Do we want to clean up _num_needed_children at this point as
223
self._num_needed_children[parent_key] = num
225
def _annotate_one(self, key, text, num_lines):
226
this_annotation = (key,)
227
# Note: annotations will be mutated by calls to _update_from*
228
annotations = [this_annotation] * num_lines
229
parent_keys = self._parent_map[key]
231
self._update_from_first_parent(key, annotations, text,
233
for parent in parent_keys[1:]:
234
self._update_from_other_parents(key, annotations, text,
235
this_annotation, parent)
236
self._record_annotation(key, parent_keys, annotations)
238
def add_special_text(self, key, parent_keys, text):
239
"""Add a specific text to the graph.
241
This is used to add a text which is not otherwise present in the
242
versioned file. (eg. a WorkingTree injecting 'current:' into the
243
graph to annotate the edited content.)
245
:param key: The key to use to request this text be annotated
246
:param parent_keys: The parents of this text
247
:param text: A string containing the content of the text
249
self._parent_map[key] = parent_keys
250
self._text_cache[key] = osutils.split_lines(text)
251
self._heads_provider = None
253
def annotate(self, key):
254
"""Return annotated fulltext for the given key.
256
:param key: A tuple defining the text to annotate
257
:return: ([annotations], [lines])
258
annotations is a list of tuples of keys, one for each line in lines
259
each key is a possible source for the given line.
260
lines the text of "key" as a list of lines
262
with ui.ui_factory.nested_progress_bar() as pb:
263
for text_key, text, num_lines in self._get_needed_texts(
265
self._annotate_one(text_key, text, num_lines)
267
annotations = self._annotations_cache[key]
269
raise errors.RevisionNotPresent(key, self._vf)
270
return annotations, self._text_cache[key]
272
def _get_heads_provider(self):
273
if self._heads_provider is None:
274
self._heads_provider = _mod_graph.KnownGraph(self._parent_map)
275
return self._heads_provider
277
def _resolve_annotation_tie(self, the_heads, line, tiebreaker):
278
if tiebreaker is None:
279
head = sorted(the_heads)[0]
281
# Backwards compatibility, break up the heads into pairs and
283
next_head = iter(the_heads)
284
head = next(next_head)
285
for possible_head in next_head:
286
annotated_lines = ((head, line), (possible_head, line))
287
head = tiebreaker(annotated_lines)[0]
290
def annotate_flat(self, key):
291
"""Determine the single-best-revision to source for each line.
293
This is meant as a compatibility thunk to how annotate() used to work.
294
:return: [(ann_key, line)]
295
A list of tuples with a single annotation key for each line.
297
custom_tiebreaker = annotate._break_annotation_tie
298
annotations, lines = self.annotate(key)
300
heads = self._get_heads_provider().heads
302
for annotation, line in zip(annotations, lines):
303
if len(annotation) == 1:
306
the_heads = heads(annotation)
307
if len(the_heads) == 1:
308
for head in the_heads:
309
break # get the item out of the set
311
head = self._resolve_annotation_tie(the_heads, line,